In [47]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromLeague.isin(leagueList))&(transfers.duration>0)]
s=s.merge(countryInfo[['marketTier']],right_index=True,left_on='fromCountry',how='left')
ss=s.groupby(['season','marketTier'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x11100b290>

In [74]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromLeague.isin(leagueList))&(transfers.duration>0)]
s=s.merge(countryInfo[['marketTier']],right_index=True,left_on='fromCountry',how='left')
ss=s.groupby(['season','marketTier'])['durationWithLoans']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[74]:
<matplotlib.axes._subplots.AxesSubplot at 0x107b5fed0>

In [65]:
t=nonloans
leagueList
s=t[(t.season>1994)&(t.fromCountry.isin(uefa))&(t.duration>0)&(t.fromLeague.isin(leagueList))&(~t.fromLeague.isin(['GB1','IT1','FR1','L1','ES1']))]
s.groupby('season').mean()['duration'].plot.area()


Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x110612250>

In [116]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)]
s=s.merge(countryInfo[['marketTier']],right_index=True,left_on='fromCountry',how='left')
ss=s.groupby(['season','marketTier'])['id']
(ss.count()).unstack().plot.line(figsize=(10,10))


Out[116]:
<matplotlib.axes._subplots.AxesSubplot at 0x113fe3310>

In [75]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)]
s=s.merge(countryInfo[['federation']],right_index=True,left_on='Nationality',how='left')
ss=s[s.federation.isin(['UEFA','CAF','CONMEBOL'])].groupby(['season','federation'])['durationWithLoans']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x107c11b50>

In [114]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&(transfers.fromCountry!=transfers.Nationality)]
s=s.merge(countryInfo[['federation']],right_index=True,left_on='Nationality',how='left')
ss=s[s.federation.isin(['UEFA','CAF','CONMEBOL'])].groupby(['season','federation'])['id']
(ss.count()).unstack().plot.line(figsize=(10,10))


Out[114]:
<matplotlib.axes._subplots.AxesSubplot at 0x1131a73d0>

In [77]:
transfers['fromTeamClass']=pd.Series(np.where(transfers.fromTeamId.isin(top100clubids),'top 100','the rest'))
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromLeague.isin(leagueList))&(transfers.duration>0)]
ss=s.groupby(['season','fromTeamClass'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x10dd88250>

In [111]:
transfers['fromTeamClass']=pd.Series(np.where(transfers.fromTeamId.isin(top100clubids),'top 100','the rest'))
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromLeague.isin(leagueList))&(transfers.duration>0)]
ss=s.groupby(['season','fromTeamClass'])['fromTeamId']
(ss.nunique()).unstack().plot.line(figsize=(10,10))


Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e45cc90>

In [79]:
transfers['fromLeagueLevel']=transfers.fromLeague.str.slice(start=-1)
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3'])]
ss=s.groupby(['season','fromLeagueLevel'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x10deb4790>

In [4]:
transfers['fromLeagueLevel']=transfers.fromLeague.str.slice(start=-1)
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3'])]
ss=s.groupby(['season','fromLeagueLevel'])['id']
(ss.count()).unstack().plot.line(figsize=(10,10))


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a9def90>

In [81]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3','4'])]
ss=s.groupby(['season','positionGroup'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[81]:
<matplotlib.axes._subplots.AxesSubplot at 0x1086a9850>

In [6]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)]
s['isForeign']=s.fromCountry!=s.Nationality
ss=s.groupby(['season','isForeign'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1094d75d0>

In [13]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)]
s['isDomesticTransfer']=s.fromCountry==s.toCountry
ss=s.groupby(['season','isDomesticTransfer'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x1152e0d10>

In [16]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.fromLeague.isin(leagueList))&(transfers.duration>0)]
s=s.merge(DataFrame({'nationalityTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='Nationality',right_index=True)
ss=s[s.nationalityTier.isin([1,2,3,'CAF','CONMEBOL'])].groupby(['season','nationalityTier'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x10aa110d0>

In [19]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)]
s=s.merge(DataFrame({'fromMarketTier':countryInfo.marketTier.fillna(countryInfo.federation)}),how='left',left_on='fromCountry',right_index=True)
ss=s.groupby(['season','fromMarketTier'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x10883e750>

In [10]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)]
s['isForeign']=s.fromCountry!=s.Nationality
s2=s[s.fromCountry.isin(['Serbia','Croatia','Bosnia-Herzegovina'])]
#s2=s[s.fromCountry.isin(['Croatia'])]
ss=s2.groupby(['season','isForeign'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x109316dd0>

In [108]:
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3','4'])]
ss=s.groupby(['season','positionGroup'])['id']
(ss.count()).unstack().plot.line(figsize=(10,10))


Out[108]:
<matplotlib.axes._subplots.AxesSubplot at 0x1144d0050>

In [65]:
transfers['isIntl']=transfers['intlCaps']>0
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3','4'])]
ss=s.groupby(['season','isIntl'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x10fdc4910>

In [107]:
transfers['isIntl']=transfers['intlCaps']>0
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3','4'])]
ss=s.groupby(['season','isIntl'])['id']
(ss.count()).unstack().plot.line(figsize=(10,10))


Out[107]:
<matplotlib.axes._subplots.AxesSubplot at 0x11650ae90>

In [69]:
transfers['hasAgent']=(transfers['playersAgentId']>0)&(transfers['playersAgentId']!=96)
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3','4'])]
ss=s.groupby(['season','hasAgent'])['duration']
(ss.describe()/366).unstack(level=0).swaplevel().ix['mean'].T.plot.line(figsize=(10,10))


Out[69]:
<matplotlib.axes._subplots.AxesSubplot at 0x130e4db50>

In [106]:
transfers['hasAgent']=(transfers['playersAgentId']>0)&(transfers['playersAgentId']!=96)
s=transfers[(transfers.age>23)&(transfers.season>1994)&(transfers.fromCountry.isin(uefa))&(transfers.duration>0)&transfers['fromLeagueLevel'].isin(['1','2','3','4'])]
ss=s.groupby(['season','hasAgent'])['id']
(ss.count()).unstack().plot.line(figsize=(10,10))


Out[106]:
<matplotlib.axes._subplots.AxesSubplot at 0x1144c8190>

In [112]:
s=nonloans[(nonloans.season>1994)&(nonloans.duration>0)&(nonloans.fromCountry.isin(uefa))&(nonloans.fromCountry!=nonloans.Nationality)&(nonloans.age>23)] #&(nonloans.fromLeague.isin(leagueList))]
ss=s.groupby(['fromCountry','season'])['durationWithLoans'].mean().unstack().sort_values(2015)
#pd.concat([ss[:5],ss[-5:]]).T.plot.line(figsize=(10,10))

def gini(list_of_values):
    sorted_list = sorted([x for x in list_of_values if x>0])[:200]
    height, area = 0, 0
    for value in sorted_list:
        height += value
        area += height - value / 2.
    fair_area = height * len(list_of_values) / 2.
    return (fair_area - area) / fair_area

def gini(list_of_values):
    return min(list_of_values)


before=ss[range(1996,2001)].mean(axis=1)
after=ss[range(2011,2016)].mean(axis=1)
changepct=((before-after)/before).fillna(0).sort_values()

def describe(t):
    return t.T.drop('season').dropna().describe().T

#ss .T.reset_index().groupby('season').apply(describe)[['25%','50%','mean','75%','min']].plot.line()

ss.ix[['Sweden','Bosnia-Herzegovina','Albania']].T.plot.line()


Out[112]:
<matplotlib.axes._subplots.AxesSubplot at 0x1142cd690>

In [150]:
big5=['GB1','ES1','L1','IT1','FR1']
tier2=['PO1','NL1','SC1','BE1']
north=['DK1','SE1','NO1','A1','C1','LUX1','FI1','IR1','IS1','WAL','NIR','FARO']
east=['LI1','ISR1','PL1','UNG1','MAL1','KAS1','TS1','SLO1','SL1','FI1','AZ1','EST1','LET1','ARM1','LI1','WER1','UKR1','RU1','TR1']
southeast=['GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','ALB1','MNE1','MAL1','MOL1','MAZ1']

def category(league):
    if league in big5:
        return '1 - big 5'
    elif league in tier2:
        return '2 - tier 2'
    elif league in north:
        return '3 - north'
    elif league in east:
        return '4 - east'
    elif league in southeast:
        return '5 - southeast'
    else:
        return 'WTF'
    
nonloans['category']=nonloans.fromLeague.apply(category)
nonloans[(nonloans.season>1994)&(nonloans['category']!='WTF')].groupby(['season','category'])['id'].count().unstack().plot.area()


Out[150]:
<matplotlib.axes._subplots.AxesSubplot at 0x114765c50>

In [149]:
print (1044+758+989)/(952.+722+614)
(2292+1829 )/(1189.+673 )


1.21984265734
Out[149]:
2.2132116004296454

In [122]:
s=transfers[(transfers.age>21)&(transfers.season>1994)&(transfers.duration>0)&(transfers.fromCountry.isin(uefa))]
ss=s.groupby(['season','Nationality'])['duration']
i=(ss.describe()).unstack(level=0).swaplevel().ix['count'].sum(axis=1).sort_values()[-30:].index #.T   #.plot.line(figsize=(10,10))
s2=ss.describe().unstack(level=0).ix[i].swaplevel().ix['mean'].sort_values(2010)
s2=(pd.concat([s2.iloc[:8],s2.iloc[-8:]])).T
(s2/366).rolling(window=3,center=False).mean().plot.line(figsize=(20,10),colormap='Dark2')


Out[122]:
<matplotlib.axes._subplots.AxesSubplot at 0x118a71510>

In [17]:
nonloans.columns


Out[17]:
Index([u'id', u'playerName', u'date', u'season', u'fromCountry', u'fromLeague',
       u'fromTeamId', u'fromTeamName', u'toCountry', u'toLeague', u'toTeamId',
       u'toTeamName', u'mv', u'fee', u'displayName', u'nameInHomeCountry',
       u'dateOfBirth', u'placeOfBirth', u'countryOfBirth', u'Age', u'Height',
       u'Nationality', u'nationality2', u'positionGroup', u'Position',
       u'mainPosition', u'sidePosition', u'Foot', u'nationalTeamId',
       u'intlCaps', u'intlGoals', u'playersAgent', u'playersAgentId',
       u'currentClub', u'currentClubId', u'inTheTeamSince', u'contractUntil',
       u'onLoanFrom', u'onLoanFromClubId', u'contractThereUntil', u'2ndClub',
       u'2ndClubId', u'completeName', u'contractOption',
       u'dateOfLastContractExtension', u'Glove', u'Outfitter', u'shoeModel',
       u'shoeSize', u'dayOfDeath', u'facebook', u'instagram', u'twitter',
       u'website', u'feeValue', u'isLoan', u'age', u'ageFloat', u'period',
       u'mvDelta', u'duration', u'durationWithLoans'],
      dtype='object')

In [7]:
z=nonloans.merge(countryInfo[['Corruption']],right_index=True,left_on='fromCountry',how='left')

s=nonloans[(nonloans.facebook.notnull())]#|(nonloans.instagram.notnull())|(nonloans.twitter.notnull())|(nonloans.website.notnull())]
s=s.merge(countryInfo[['marketTier']],right_index=True,left_on='fromCountry',how='left')
s=s[(s.age>21)&(s.season>2000)&(s.duration>0)&(s.id.isin(z[z.Corruption<50]['id'].drop_duplicates().tolist()))] #&(s.feeValue.notnull())]

s=s.groupby('id').agg({'duration':'mean','fromTeamId': 'count'}).rename(columns={'fromTeamId':'transferCount'})
z=s[s.transferCount>3].sort_values('duration').reset_index().merge(nonloans[['id','Nationality','facebook','instagram','twitter','website']].drop_duplicates(), how='left') 
#z[z.Nationality=='Germany']
z #[:300].groupby('Nationality')['id'].count().sort_values().plot.pie()
#.hist(bins=60,figsize=(20,10))


Out[7]:
id duration transferCount Nationality facebook instagram twitter website
0 67064 190.333333 6 Costa Rica https://www.facebook.com/brownforbes https://instagram.com/feliciobrownforbes https://www.twitter.com/FelicioBF NaN
1 177331 236.400000 5 Croatia http://www.facebook.com/antonio.asanovic NaN NaN NaN
2 58380 249.000000 9 Slovenia https://www.facebook.com/pages/Patrik-Bordon/1... NaN NaN NaN
3 32701 251.727273 11 France http://www.facebook.com/joel.thomas.925 NaN NaN NaN
4 207562 260.857143 14 Portugal http://www.facebook.com/pages/Gustavo-Ribeiro/... NaN NaN http://www.GutiRibeiro.pt.vu
5 72831 260.857143 7 Bosnia-Herzegovina http://www.facebook.com/den.ci.5frefts NaN NaN NaN
6 289836 270.250000 4 Romania http://www.facebook.com/staikone.mariusmario NaN NaN NaN
7 64075 274.000000 4 Romania http://www.facebook.com/cristmjhg NaN NaN NaN
8 89386 297.000000 9 Turkey http://www.facebook.com/goksu.hasancik NaN http://twitter.com/GoksuHasancik http://www.goksuhasancik.com
9 172173 331.333333 6 Portugal http://www.facebook.com/ChicoGomes302817906415895 NaN NaN NaN
10 7561 340.352941 17 Ecuador https://de-de.facebook.com/ikaviedesweb NaN NaN NaN
11 48778 348.555556 9 Romania http://www.facebook.com/firtulescu.dragos NaN NaN NaN
12 200624 349.833333 6 Romania http://www.facebook.com/tinc.andrei NaN NaN NaN
13 48765 365.222222 9 Montenegro http://www.facebook.com/ilija.spasojevic http://instagram.com/spaso_87 http://twitter.com/Spaso_87 NaN
14 107849 365.250000 4 Serbia http://www.facebook.com/danijel.morariju NaN NaN NaN
15 214781 371.000000 4 Bosnia-Herzegovina http://www.facebook.com/esmir.e.ahmetovic NaN NaN NaN
16 94036 385.500000 6 Armenia https://www.facebook.com/hirac.yagan NaN NaN NaN
17 213699 387.000000 5 Romania http://www.facebook.com/profile.phpid100004142... NaN NaN NaN
18 18945 391.428571 7 France https://www.facebook.com/StevenThicot NaN NaN NaN
19 66008 411.750000 4 Belarus https://www.facebook.com/renanbressan10 NaN NaN NaN
20 34554 418.625000 8 Croatia http://www.facebook.com/aljosa.vojnovic http://www.instagram.com/aljosavojnovic/ NaN NaN
21 32476 426.000000 6 Switzerland http://www.facebook.com/FabianStoller194673330... http://www.instagram.com/fabianstoller1988/ NaN NaN
22 44376 435.000000 6 Italy https://www.facebook.com/christian.tiboni.9 NaN NaN NaN
23 48890 438.200000 5 Romania http://www.facebook.com/mihai.pintilii.1 NaN NaN NaN
24 81509 443.000000 7 Brazil http://https:www.facebook.comerigol27frefts http://https:www.instagram.comoliver27rj NaN NaN
25 34322 448.750000 4 Greece https://www.facebook.com/SokratisPapastathopoulos NaN NaN https://www.sokratispapastathopoulos.com/
26 105584 458.166667 6 Senegal http://www.facebook.com/jules.keita.756 NaN NaN NaN
27 5197 469.571429 7 Albania https://www.facebook.com/jurgengjasula10 https://www.instagram.com/gjizzii_10/ NaN https://jurgengjasula.com/
28 5879 473.166667 12 Romania https://www.facebook.com/ADRIAN.MUTU.OFFICIAL.... https://instagram.com/adrian10mutu https://twitter.com/adrianmutu https://www.adrianmutu.com
29 5876 492.444444 9 Brazil https://www.facebook.com/A10imperador https://www.instagram.com/adrianoimperador/ https://twitter.com/a10imperador NaN
... ... ... ... ... ... ... ... ...
65 35511 709.857143 7 Uruguay https://www.facebook.com/LoloEstoyanoff11 https://www.instagram.com/loloel11/ https://twitter.com/loloestoyanoff NaN
66 52513 733.000000 4 Brazil https://www.facebook.com/henriqueabuss NaN https://twitter.com/henriqueabuss NaN
67 27805 739.500000 4 Netherlands https://www.facebook.com/ELJEROELIA11 https://instagram.com/iameljero11elia https://twitter.com/ELJERO11ELIA https://www.eljeroelia.com
68 3373 743.285714 7 Brazil https://www.facebook.com/RonaldinhoOficial/frefts https://www.instagram.com/ronaldinhooficial/ https://twitter.com/10Ronaldinho https://www.ronaldinhogaucho.com
69 46423 781.250000 4 Croatia https://www.facebook.com/hrvoje.milic.330 NaN NaN NaN
70 36802 784.250000 4 Turkey https://www.facebook.com/manlikecolinkazimrich... https://www.instagram.com/manlikecolinkazimric... https://twitter.com/colin_kazim08 NaN
71 13389 786.571429 7 Cote d'Ivoire https://facebook.com/dzokora5 NaN https://twitter.com/dzokora5 https://www.didier-zokora.com/
72 4170 799.428571 7 Italy https://www.facebook.com/divaiomarco NaN https://twitter.com/divaio9 https://www.marcodivaio9.it/it/
73 3455 807.000000 5 Sweden https://www.facebook.com/ZlatanIbrahimovic https://www.instagram.com/iamzlatanibrahimovic/ https://twitter.com/ibra_official NaN
74 12906 812.600000 5 France https://www.facebook.com/pages/Michael-Ciani/2... https://instagram.com/mikeciani https://twitter.com/cianimichael NaN
75 24661 841.200000 5 Serbia https://www.facebook.com/MilanBisevacOfficiel_... NaN https://twitter.com/MilanBisevac NaN
76 123319 846.750000 4 Romania http://www.facebook.com/stratila.sorin.3 NaN NaN NaN
77 82508 867.750000 4 Slovakia http://www.facebook.com/pages/Libor-Hrdli%C4%8... NaN NaN NaN
78 95182 872.500000 4 Romania https://www.facebook.com/dragos.grigore.56 NaN NaN NaN
79 44514 881.600000 5 Italy https://www.facebook.com/federico.pioviccari NaN NaN NaN
80 5668 905.714286 7 France https://www.facebook.com/pages/Anthony-R%C3%A9... https://instagram.com/reveillereofficiel https://twitter.com/Reveillere_Off NaN
81 3408 914.142857 7 Uruguay https://www.facebook.com/DiegoForlanOficial https://www.instagram.com/diegoforlan_7 https://twitter.com/DiegoForlan7 https://www.diegoforlan.com/
82 22328 916.000000 6 Italy https://www.facebook.com/quagliarella27.it NaN https://twitter.com/FQuagliarella27 NaN
83 47490 949.600000 5 Romania http://www.facebook.com/traian.marc.9 NaN NaN NaN
84 19447 953.750000 4 Italy https://www.facebook.com/pages/Giampaolo-Pazzi... NaN NaN https://www.giampaolopazzini.eu/
85 15511 955.500000 4 Brazil https://www.facebook.com/r7oficial https://instagram.com/oficialrobinho https://twitter.com/oficialrobinho NaN
86 12149 961.800000 5 Albania https://www.facebook.com/lorikcana5/refprofile NaN NaN NaN
87 22185 1004.666667 6 Serbia https://www.facebook.com/vlada.avramov?fref=ts NaN NaN NaN
88 12563 1005.750000 4 Macedonia https://www.facebook.com/goranpandevpaginauffi... NaN NaN NaN
89 48003 1022.800000 5 Italy https://it-it.facebook.com/pages/Puggioni-Chri... NaN https://twitter.com/Puggio_1 NaN
90 34758 1027.500000 4 Romania https://www.facebook.com/pages/Banel-Nicolita/... https://www.instagram.com/banelnicolita NaN https://www.banel-16-nicolita.piczo.com
91 3817 1057.750000 4 Germany https://www.facebook.com/MalikFathiOfficial NaN https://twitter.com/MalikFathi https://www.Malik-Fathi.com/
92 6427 1324.250000 4 Italy https://www.facebook.com/MarcoDonadelOfficial NaN NaN https://www.marcodonadel.it/
93 19405 1448.500000 4 Romania http://www.facebook.com/Ionel-Danciulescu-4502... NaN NaN NaN
94 5796 1651.250000 4 Uruguay https://www.facebook.com/Chino20Recoba https://www.instagram.com/chino_recoba20 https://twitter.com/chino_recoba20 NaN

95 rows × 8 columns


In [6]:
nonloans[:1]


Out[6]:
id playerName date season fromCountry fromLeague fromTeamId fromTeamName toCountry toLeague ... twitter website feeValue isLoan age ageFloat period mvDelta duration durationWithLoans
0 175743 Craig Roddan 2009-07-01 2009 England Other England 12808 Wigan U18 England GBJG ... NaN NaN NaN False 16.0 15.69642 (2005, 2010] NaN NaN NaN

1 rows × 62 columns


In [8]:
nonloans.groupby('fromCountry')


Out[8]:
Corruption
Country
Germany 81.0
Spain 58.0
England 81.0
Portugal 63.0
Belgium 77.0
Italy 44.0
Netherlands 87.0
France 70.0
Russia 29.0
Switzerland 86.0
Austria 76.0
Croatia 51.0
Bosnia-Herzegovina 38.0
Ukraine 27.0
Czech Republic 56.0
Sweden 89.0
Poland 62.0
Romania 46.0
Slovakia 51.0
Hungary 51.0
Denmark 91.0
Turkey 42.0
Ireland 75.0
Greece 46.0
Norway 87.0
Slovenia 60.0
Iceland 79.0
Wales NaN
Israel 61.0
Scotland NaN
Albania 36.0
Montenegro 44.0
Northern Ireland NaN
Serbia 40.0
Finland 90.0
Bulgaria 41.0
Armenia 35.0
Estonia 70.0
Lithuania 61.0
Belarus 32.0
Georgia 52.0
Azerbaijan 29.0
Latvia 55.0
Cyprus 61.0
Moldova 33.0
Macedonia 42.0
Kazakhstan 28.0
Luxembourg 81.0
Liechtenstein NaN
Faroe Islands NaN
Malta 56.0
Andorra NaN
San Marino NaN
Gibraltar NaN

In [14]:
z=nonloans.merge(countryInfo[['Corruption']],right_index=True,left_on='fromCountry',how='left')

s=z[(z.facebook.notnull())]#|(z.instagram.notnull())|(z.twitter.notnull())|(z.website.notnull())]
s=s.merge(countryInfo[['marketTier']],right_index=True,left_on='fromCountry',how='left')
s=s[(s.age>21)&(s.season>2000)&(s.duration>0)&(s.duration<244)&(z.Corruption<50)&(z.fromCountry != 'Italy')] #&(s.feeValue.notnull())]

s=s.groupby('id').agg({'duration':'mean','fromTeamId': 'count'}).rename(columns={'fromTeamId':'transferCount'})
z=s[s.transferCount>0].sort_values('duration').reset_index().merge(nonloans[['id','playerName','Nationality','facebook','instagram','twitter','website']].drop_duplicates(), how='left') 
#z[z.Nationality=='Germany']
z #[:300].groupby('Nationality')['id'].count().sort_values().plot.pie()
#.hist(bins=60,figsize=(20,10))


Out[14]:
id duration transferCount playerName Nationality facebook instagram twitter website
0 19081 49.0 1 Lukasz Szukala Poland https://www.facebook.com/szukala?fref=pb&hc_lo... NaN NaN NaN
1 105584 52.0 1 Souleymane Keita Senegal http://www.facebook.com/jules.keita.756 NaN NaN NaN
2 32749 61.0 1 Ansi Agolli Albania http://www.facebook.com/AgolliAnsi/refprofile NaN NaN NaN
3 66008 99.5 2 Renan Bressan Belarus https://www.facebook.com/renanbressan10 NaN NaN NaN
4 213641 116.5 2 Ionut Cioinac Romania http://www.facebook.com/ionutadrian.cioinac NaN NaN NaN
5 67064 123.0 1 Felicio Brown Forbes Costa Rica https://www.facebook.com/brownforbes https://instagram.com/feliciobrownforbes https://www.twitter.com/FelicioBF NaN
6 44521 128.0 1 Kevin Großkreutz Germany https://www.facebook.com/Fischkreutz-617140571... https://www.instagram.com/fischkreutz/ https://twitter.com/fischkreutz_KG https://www.kevin-grosskreutz.de/
7 64075 141.0 1 Catalin Samoila Romania http://www.facebook.com/cristmjhg NaN NaN NaN
8 107849 152.0 1 Danijel Morariju Serbia http://www.facebook.com/danijel.morariju NaN NaN NaN
9 58380 152.5 2 Patrik Bordon Slovenia https://www.facebook.com/pages/Patrik-Bordon/1... NaN NaN NaN
10 32446 153.0 1 Juan Pablo Garat Argentina http://www.facebook.com/garatjuanfrefts http://www.instagram.com/garatjuanpablo/ NaN NaN
11 48778 154.2 5 Dragos Firtulescu Romania http://www.facebook.com/firtulescu.dragos NaN NaN NaN
12 47568 155.0 1 Stefan Barboianu Romania http://www.facebook.com/barboianu.stefan NaN NaN NaN
13 39101 155.5 2 Ilko Pirgov Bulgaria http://www.facebook.com/ilko.pirgov.26 NaN NaN NaN
14 2937 160.0 1 Helge Payer Austria http://www.facebook.com/helgepayer?fref=ts NaN NaN http://www.helgepayer.com/
15 214781 166.0 1 Esmir Ahmetovic Bosnia-Herzegovina http://www.facebook.com/esmir.e.ahmetovic NaN NaN NaN
16 41113 167.0 2 Jahmir Hyka Albania https://www.facebook.com/jahmirhykaofficial/re... NaN NaN NaN
17 81509 170.5 2 Eric Brazil http://https:www.facebook.comerigol27frefts http://https:www.instagram.comoliver27rj NaN NaN
18 177331 171.0 1 Antonio Asanovic Croatia http://www.facebook.com/antonio.asanovic NaN NaN NaN
19 9824 176.5 2 Hugo Almeida Portugal https://www.facebook.com/HugoAlmeidaOficial NaN https://twitter.com/almeida9_hugo NaN
20 99342 181.0 1 Edon Hasani Albania http://www.facebook.com/edonhasani8 NaN NaN NaN
21 48890 181.0 1 Mihai Pintilii Romania http://www.facebook.com/mihai.pintilii.1 NaN NaN NaN
22 116917 181.0 1 Elton Brazil http://www.facebook.com/elton.charles.37 NaN NaN NaN
23 289186 181.0 1 Iustin Popescu Romania http://www.facebook.com/iustin.popescu.7 NaN NaN NaN
24 48662 182.5 2 Vitinha Portugal http://www.facebook.com/vitinha.vitortiago NaN NaN NaN
25 207562 182.5 2 Guti Ribeiro Portugal http://www.facebook.com/pages/Gustavo-Ribeiro/... NaN NaN http://www.GutiRibeiro.pt.vu
26 238515 182.5 2 Gelu Velici Romania http://www.facebook.com/gelu.velici NaN NaN NaN
27 32701 182.5 2 Joël Thomas France http://www.facebook.com/joel.thomas.925 NaN NaN NaN
28 74996 183.0 1 Izet Hajrovic Bosnia-Herzegovina https://www.facebook.com/Izet.H.Official https://instagram.com/hajrovicizet14 NaN NaN
29 166145 184.0 1 Denis Cana Montenegro http://www.facebook.com/denis.cana NaN NaN NaN
30 25812 184.0 1 Emir Spahic Bosnia-Herzegovina https://www.facebook.com/Emir.Spahic.Official?... NaN NaN NaN
31 87296 189.0 2 Kristijan Naumovski Macedonia http://www.facebook.com/kiko.naumovski.5 NaN NaN NaN
32 289836 189.5 2 Marius Staicu Romania http://www.facebook.com/staikone.mariusmario NaN NaN NaN
33 58720 189.5 2 Srdjan Luchin Romania http://www.facebook.com/srgian.luchin.5 NaN NaN NaN
34 103523 190.5 4 Ioan Mera Romania https://www.facebook.com/ioan.mera NaN NaN NaN
35 213699 193.0 1 Victor Rimniceanu Romania http://www.facebook.com/profile.phpid100004142... NaN NaN NaN
36 111995 196.0 1 Hamza Younes Tunisia http://www.facebook.com/hamza.offishal NaN NaN NaN
37 94036 196.0 1 Hiraç Yagan Armenia https://www.facebook.com/hirac.yagan NaN NaN NaN
38 48765 198.0 2 Ilija Spasojević Montenegro http://www.facebook.com/ilija.spasojevic http://instagram.com/spaso_87 http://twitter.com/Spaso_87 NaN
39 72831 205.0 1 Denis Pozder Bosnia-Herzegovina http://www.facebook.com/den.ci.5frefts NaN NaN NaN
40 89386 207.5 2 Göksu Hasancik Turkey http://www.facebook.com/goksu.hasancik NaN http://twitter.com/GoksuHasancik http://www.goksuhasancik.com
41 34554 209.0 1 Aljosa Vojnovic Croatia http://www.facebook.com/aljosa.vojnovic http://www.instagram.com/aljosavojnovic/ NaN NaN
42 52769 209.0 1 Willian Brazil https://www.facebook.com/willianborges88 https://www.instagram.com/willianborges88/ https://twitter.com/willianborges88 https://www.willianshow.com.br
43 14262 215.0 1 Amoreirinha Portugal https://www.facebook.com/daniel.amoreirinha?fr... NaN NaN NaN
44 69402 215.0 1 Vlad Achim Romania http://www.facebook.com/profile.phpid100006256... NaN NaN NaN
45 48908 215.0 1 Szabolcs Szekely Romania http://www.facebook.com/szabi.szekely.7 NaN NaN NaN

In [6]:
def avgAge(t):
    allIn=t[t.toLeague.isin(leagueList)].groupby('toCountry')['ageFloat'].mean().sort_values()
    allOut=t[t.fromLeague.isin(leagueList)].groupby('fromCountry')['ageFloat'].mean().sort_values()
    freeIn=t[t.toLeague.isin(leagueList)&(t.feeValue==0)].groupby('toCountry')['ageFloat'].mean().sort_values()
    freeOut=t[t.fromLeague.isin(leagueList)&(t.feeValue==0)].groupby('fromCountry')['ageFloat'].mean().sort_values()
    paidIn=t[t.toLeague.isin(leagueList)&(t.feeValue>0)].groupby('toCountry')['ageFloat'].mean().sort_values()
    paidOut=t[t.fromLeague.isin(leagueList)&(t.feeValue>0)].groupby('fromCountry')['ageFloat'].mean().sort_values()
    z=DataFrame({'all imports':allIn,'all exports':allOut,'free imports':freeIn,'free exports':freeOut,'paid imports':paidIn,'paid exports':paidOut}).sort_values('all imports')
    z=z[['all imports', 'all exports', 'paid imports', 'paid exports', 'free imports', 'free exports']]
    z.columns.name='average age'
    return z.reset_index().rename(columns={'index':'country'})

output=nonloans[(nonloans.fromCountry!=nonloans.toCountry)&(nonloans.season>2000)].groupby('period').apply(avgAge).unstack(level=0).swaplevel(axis=1).sort_index(axis=1,level=0)
output.to_excel('../data/analysis/average_age.xlsx')
output


Out[6]:
period (2000, 2005] (2005, 2010] (2010, 2015]
average age country all imports all exports paid imports paid exports free imports free exports country all imports all exports ... paid exports free imports free exports country all imports all exports paid imports paid exports free imports free exports
0 Ireland 22.409149 23.529703 23.973114 20.932212 22.034243 24.249414 Ireland 22.778899 24.909037 ... 21.327122 22.812176 24.456324 Ireland 23.554922 24.701987 NaN 21.669165 23.981762 24.280163
1 Lithuania 23.829230 23.703737 20.531565 20.992217 23.684887 23.924329 Italy 23.516521 28.330525 ... 25.489691 25.748321 27.642635 Germany 23.981731 27.441849 22.971793 24.677944 26.513846 28.241884
2 Netherlands 23.860759 26.440232 22.631212 25.085274 24.584446 25.599119 Belgium 24.109426 25.864201 ... 23.901403 24.421822 25.629242 England 24.001993 27.161820 23.356485 25.398126 26.487572 26.595067
3 Belgium 23.941876 25.433670 23.640985 24.115485 23.573580 24.786286 England 24.424460 27.551235 ... 25.065190 26.543741 27.260546 Belgium 24.030334 25.777119 22.733017 23.926103 24.870562 25.668231
4 Italy 24.095842 27.364584 24.095550 25.825943 24.927842 27.510049 Bosnia-Herzegovina 24.452896 24.932264 ... 22.849398 24.597946 24.707228 Austria 24.045466 26.339036 21.710371 22.854283 24.905338 26.890001
5 Ukraine 24.148060 25.964625 22.266673 24.455594 24.702988 26.234762 Russia 24.505305 27.426174 ... 25.545174 26.675624 28.277178 Italy 24.201110 27.000105 23.137414 25.218059 25.749512 27.400262
6 Switzerland 24.224945 25.476489 24.863078 23.980625 24.379445 25.600939 Finland 24.526836 25.279253 ... 21.860292 24.558650 24.399504 Norway 24.208441 26.400276 22.459934 22.699114 26.065156 26.418249
7 Finland 24.267914 26.184582 21.514473 22.338583 24.571632 25.180812 Portugal 24.532531 26.700104 ... 25.298740 25.472552 26.727513 Croatia 24.361878 24.771819 23.684863 22.815716 24.729379 24.982493
8 Bosnia-Herzegovina 24.382172 24.302822 23.195548 22.997392 24.360264 23.642983 Luxembourg 24.534843 27.623328 ... 21.007960 24.323647 26.651817 Lithuania 24.377619 24.973603 NaN 21.888882 24.764587 24.838176
9 Kazakhstan 24.387747 26.002511 25.632285 NaN 23.913918 23.435571 Sweden 24.652319 26.162207 ... 23.913178 24.998683 25.363138 Slovenia 24.407468 25.310877 20.127267 22.994495 25.064999 25.299306
10 Czech Republic 24.447384 25.774304 23.630875 23.983697 25.135184 26.623009 Norway 24.692106 28.144113 ... 25.586689 25.771919 26.989482 Luxembourg 24.469735 26.856308 NaN NaN 24.555198 25.851512
11 Serbia 24.514504 24.165868 22.276068 22.795219 24.913908 24.097415 Germany 24.709120 28.156995 ... 25.111369 27.003786 28.494561 Netherlands 24.524977 26.023778 22.222347 23.944747 25.866896 25.811947
12 England 24.537455 27.843669 23.953204 25.649321 25.646439 27.253574 Netherlands 24.714199 26.809086 ... 24.351406 26.325168 26.570713 Bosnia-Herzegovina 24.572338 25.274883 20.909396 23.039073 24.969110 25.218014
13 France 24.566698 26.307493 23.308859 24.964672 25.531895 25.673118 Ukraine 24.724838 27.090178 ... 25.028186 25.151993 26.823437 Sweden 24.749753 26.119564 23.782889 23.248351 25.357918 25.415102
14 Russia 24.611994 26.930195 23.927922 25.999621 26.102658 27.408274 Lithuania 24.727558 25.127793 ... 23.633613 25.001270 24.451270 Slovakia 24.751372 25.196000 22.687393 22.428641 25.402174 25.000504
15 Israel 24.626967 26.219730 24.441296 23.966269 24.767302 26.413273 Scotland 24.740369 26.238900 ... 24.625470 25.172611 26.552903 Switzerland 24.778193 25.983267 23.658748 23.234306 25.745939 26.388071
16 Luxembourg 24.720274 28.542516 NaN NaN 24.587804 26.878088 Switzerland 24.798291 26.288892 ... 23.126915 25.638010 26.367364 Portugal 24.817382 26.533887 22.433790 24.545143 25.780144 26.784921
17 Sweden 24.787623 25.967872 24.235953 23.906772 24.750437 25.075685 Turkey 24.827375 27.207973 ... 27.291837 25.206001 26.506472 Russia 25.011891 27.803074 24.123154 25.963665 26.394202 27.985861
18 Portugal 24.924613 27.571309 23.112042 25.509853 25.262807 26.988766 Hungary 24.931577 26.137343 ... 23.169924 25.156836 26.269768 Hungary 25.083380 26.412664 22.310813 24.054242 25.166005 25.874893
19 Croatia 25.077151 25.980850 23.056828 23.679725 24.573040 25.767452 Spain 25.045056 28.469923 ... 26.004822 27.209875 29.057760 France 25.198720 26.792890 23.886058 24.827430 27.202886 26.250607
20 Slovenia 25.161065 26.145761 19.892263 22.833896 25.533990 25.176851 Slovenia 25.080848 25.838433 ... 22.546117 25.314474 25.947087 Ukraine 25.232376 27.163414 23.608117 25.010233 25.516110 26.863987
21 Spain 25.163496 28.838216 24.007526 25.594060 26.377030 28.348205 France 25.099319 26.590250 ... 25.372112 26.918598 26.430748 Finland 25.272122 25.811021 24.000493 22.057211 25.040640 24.979236
22 Turkey 25.167983 27.337123 25.016186 25.982655 25.181532 26.689714 Slovakia 25.159989 26.221233 ... 24.741192 26.037967 26.149911 Scotland 25.306615 25.519441 23.098606 22.773789 25.027889 25.524701
23 Malta 25.192212 26.565182 NaN 24.972450 23.074396 21.994520 Poland 25.233289 27.322139 ... 24.831507 26.285070 26.856149 Denmark 25.339676 26.391564 23.874381 23.919282 26.208138 26.195955
24 Bulgaria 25.205987 25.944442 23.664415 24.857247 26.388114 26.406566 Serbia 25.261508 25.371511 ... 23.676567 26.637815 25.323057 Poland 25.366199 26.999999 23.368247 23.918616 25.835413 26.982621
25 Norway 25.250481 26.943487 25.747379 24.171507 26.006059 25.572879 Romania 25.292255 26.764359 ... 25.518560 26.141811 26.513124 Malta 25.383827 26.809570 24.353683 23.600758 25.782369 26.645481
26 Scotland 25.263673 27.512516 25.100966 25.661597 25.242426 27.269532 Czech Republic 25.365890 26.002480 ... 24.049853 26.981546 26.718511 Bulgaria 25.434963 26.461156 23.844864 25.160301 25.759324 26.318231
27 Denmark 25.284908 26.967039 25.336983 24.920239 25.045710 26.991539 Denmark 25.387656 26.674637 ... 24.528796 25.314405 26.242230 Serbia 25.568841 25.359731 22.779624 21.943743 26.606096 25.838070
28 Germany 25.320799 29.129799 24.634123 27.430450 26.534835 28.314619 Croatia 25.459290 25.787442 ... 23.517658 26.187622 25.981922 Spain 25.577079 28.075527 23.953010 25.572849 27.698959 29.073419
29 United States 25.411523 26.151092 23.788305 24.177772 25.651595 25.526697 Israel 25.470186 26.505992 ... 24.136171 26.162823 26.250385 Turkey 25.658083 27.939119 24.850313 26.262574 26.387854 28.428601
30 Azerbaijan 25.425437 24.327502 25.479647 NaN 25.026843 23.699323 Bulgaria 25.786103 26.953808 ... 25.293971 26.449314 26.751698 Czech Republic 25.758570 26.909401 22.379231 23.369919 27.119829 26.598318
31 Poland 25.554409 26.411950 23.381204 24.813651 26.040918 25.806662 United States 25.816199 26.096768 ... 22.575412 26.468504 25.750779 Israel 25.796180 26.593533 24.526592 23.836121 26.422797 26.743492
32 Greece 25.584299 27.598946 24.797881 24.187697 25.717723 27.208554 Malta 25.869977 26.970343 ... 18.606816 26.166177 26.057609 Romania 25.829464 26.929484 25.108307 25.104478 25.828406 26.965664
33 Romania 25.852072 27.006314 25.225022 24.310374 26.299970 27.489056 Kazakhstan 25.874726 27.861951 ... 28.749849 25.552357 27.596659 United States 26.080620 26.877986 25.938274 24.202293 27.123461 27.043471
34 Slovakia 25.917202 25.579139 25.396825 22.474566 26.063023 25.607957 Austria 26.057217 27.576059 ... 24.105495 27.203673 28.132777 Azerbaijan 26.175314 27.811328 24.682505 25.207910 26.730855 27.932095
35 Hungary 25.930146 27.370762 23.835827 24.749671 26.092200 27.679269 Greece 26.495774 27.561571 ... 25.599192 26.983668 27.576737 Kazakhstan 26.287586 27.791283 25.761994 18.853228 26.786943 27.897029
36 Austria 26.390614 27.639546 25.199774 25.189703 27.176329 27.306218 Azerbaijan 26.969784 28.981581 ... 23.799713 27.317208 28.590093 Cyprus 26.598021 28.108576 25.824110 25.299269 26.749988 28.139008
37 Cyprus 27.849420 28.504249 22.665421 23.928395 28.293720 27.971287 Cyprus 27.033842 28.367226 ... 26.714416 27.159585 28.496170 Greece 26.634915 28.060698 23.896607 25.323358 26.767323 27.855215

38 rows × 21 columns


In [10]:
nonloans[(nonloans.season>1994)&(nonloans.duration>0)].groupby('season')['duration'].describe().unstack()[['min','25%','50%','75%','mean']].plot.line()


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x10956b490>

In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path 
import datetime 
import math
import matplotlib.pyplot as plt

transfers = pd.read_csv('../data/merged/transfers_q.csv')
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')

leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
                                                         'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
                                                         'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}

teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
    transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
    .sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
    
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()

leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1','IR1']

countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')
countryInfo.marketTier=np.where(countryInfo.marketTier==4,3,countryInfo.marketTier)

top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index

uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index

country_coeff=pd.read_table('../data/reference/country_coefficient.csv').reset_index(drop=True)
country_coeff.columns=['Movement','Country','2012 coeff','2013 coeff','2014 coeff','2015 coeff','2016 coeff','Total coeff','Teams Total','CL','EL','Total']
country_coeff=country_coeff.drop(['Movement','2016 coeff','CL','EL','Total','Teams Total','Total coeff'],axis=1)
country_coeff['Country']=country_coeff['Country'].str.split('\(L',expand=True)[0].str.strip()
country_coeff=country_coeff.set_index('Country')
country_coeff = country_coeff.join(DataFrame({'total coeff':country_coeff.sum(axis=1)}))

natteam_coeff=pd.read_table('../data/reference/national_team_coefficient.csv').reset_index(drop=True).drop('Rank',axis=1)
natteam_coeff['Country']=natteam_coeff['Country'].str.strip()
natteam_coeff.Coefficient=natteam_coeff.Coefficient.apply(lambda s: int(s.replace(',','')))
natteam_coeff=natteam_coeff.rename(columns={'Coefficient':'National team coeff'}).set_index('Country')

club_coeff=pd.read_table('../data/reference/club_coefficient.csv').reset_index(drop=True).set_index('Club')
club_coeff=club_coeff.drop(['2017','2016','Mvmt','Total'],axis=1)
club_coeff.Association = club_coeff.Association.str.split(expand=True)[0]
# sanity check: club_coeff.merge(DataFrame({'Team Name':teamIdMap}),how='left',left_on='Team Id',right_index=True)
club_coeff=club_coeff.reset_index().set_index('Team Id').rename(columns=lambda c: 'Club coeff ' + c.split('–')[0] if '–' in c else c)

gdp=pd.read_table('../data/reference/country_gdp.csv').reset_index(drop=True).drop('2014 Rank',axis=1).rename(columns={'Country ':'Country'})
gdp.Country=gdp.Country.str.strip()
gdp=gdp.set_index('Country')
gdp.columns=['GDP '+c for c in gdp.columns.str.strip()]
for column in gdp.columns:
    gdp[column]=gdp[column].apply(lambda s: float(str(s).replace(',','')))
    
unemployed=pd.read_table('../data/reference/country_unemployment.csv').drop(['Date','Employment rate'],axis=1)
unemployed.columns=['Country','Unemployment']
unemployed['Country']=unemployed['Country'].str.split(' ').apply(lambda arr: ' '.join(arr[:len(arr)/2]))
unemployed=unemployed.set_index('Country').rename(index={'United Kingdom':'England'})

corruption=pd.read_table('../data/reference/country_corruption.csv').drop(['Rank'],axis=1)
corruption.Country=corruption.Country.str.strip()
corruption=corruption.set_index('Country')
corruption=corruption[corruption.columns[:1]].rename(columns={'2015 score':'Corruption'}).rename(index={'United Kingdom':'England'})

population=pd.read_table('../data/reference/country_population.csv').drop(['Rank'],axis=1)
population.columns=population.columns.str.strip()
population.Country=population.Country.str.split('[',expand=True)[0].str.strip()
population['Population']=population['Population'].apply(lambda s: float(str(s).replace(',','')))
population=population[['Country','Population','Growth %']].set_index('Country').rename(index={'United Kingdom':'England'})

barometer=pd.read_table('../data/reference/country_corruption_barometer.csv').reset_index(drop=True)
barometer.columns=['Corruption Rank','Country','Bribe %']
barometer.Country=barometer.Country.str.strip()
barometer=barometer.set_index('Country').rename(index={'United Kingdom':'England'})

countryInfo=natteam_coeff.join(country_coeff).join(gdp).join(unemployed).join(corruption).join(population).join(countryInfo).join(barometer)


/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (0,38,40,41,45,47,49,55) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (45,47) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (40,45,47,51) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/sunkevin/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2871: DtypeWarning: Columns (32,34) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)