In [1]:

    
# imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import show

from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing as pp
from sklearn.model_selection import KFold , cross_val_score
from sklearn.metrics import make_scorer, roc_curve, roc_auc_score

import sklearn.preprocessing as preprocessing

%matplotlib inline
sns.set_context('notebook')
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_columns', 500) # to see all columns



In [2]:

    
data = pd.read_csv('CrowdstormingDataJuly1st.csv')
data_total = data.copy()
print('Number of dayads', data.shape)
data.head()









    



Number of dayads (146028, 28)






    Out[2]:






  
    
      
      playerShort
      player
      club
      leagueCountry
      birthday
      height
      weight
      position
      games
      victories
      ties
      defeats
      goals
      yellowCards
      yellowReds
      redCards
      photoID
      rater1
      rater2
      refNum
      refCountry
      Alpha_3
      meanIAT
      nIAT
      seIAT
      meanExp
      nExp
      seExp
    
  
  
    
      0
      lucas-wilchez
      Lucas Wilchez
      Real Zaragoza
      Spain
      31.08.1983
      177.0
      72.0
      Attacking Midfielder
      1
      0
      0
      1
      0
      0
      0
      0
      95212.jpg
      0.25
      0.50
      1
      1
      GRC
      0.326391
      712.0
      0.000564
      0.396000
      750.0
      0.002696
    
    
      1
      john-utaka
      John Utaka
      Montpellier HSC
      France
      08.01.1982
      179.0
      82.0
      Right Winger
      1
      0
      0
      1
      0
      1
      0
      0
      1663.jpg
      0.75
      0.75
      2
      2
      ZMB
      0.203375
      40.0
      0.010875
      -0.204082
      49.0
      0.061504
    
    
      2
      abdon-prats
      Abdón Prats
      RCD Mallorca
      Spain
      17.12.1992
      181.0
      79.0
      NaN
      1
      0
      1
      0
      0
      1
      0
      0
      NaN
      NaN
      NaN
      3
      3
      ESP
      0.369894
      1785.0
      0.000229
      0.588297
      1897.0
      0.001002
    
    
      3
      pablo-mari
      Pablo Marí
      RCD Mallorca
      Spain
      31.08.1993
      191.0
      87.0
      Center Back
      1
      1
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      3
      3
      ESP
      0.369894
      1785.0
      0.000229
      0.588297
      1897.0
      0.001002
    
    
      4
      ruben-pena
      Rubén Peña
      Real Valladolid
      Spain
      18.07.1991
      172.0
      70.0
      Right Midfielder
      1
      1
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      3
      3
      ESP
      0.369894
      1785.0
      0.000229
      0.588297
      1897.0
      0.001002

1. Observation and clean of the data



In [3]:

    
print('Number of diad: ', len(data))
print('Number of players: ', len(data.playerShort.unique()))
print('Number of referees: ', len(data.refNum.unique()))









    



Number of diad:  146028
Number of players:  2053
Number of referees:  3147

1.1. Missing values



In [4]:

    
complete = len(data.dropna())
all_ = len(data_total)
print('Number of row with complete data: {} ({:.3f}%)'.format(complete, (complete/all_ ) * 100 ))
print('Number of row with missing data: {} ({:.3f}%)'.format(all_-complete, (all_ -complete)/all_  * 100 ))









    



Number of row with complete data: 115457 (79.065%)
Number of row with missing data: 30571 (20.935%)

Where are the missing values ?



In [5]:

    
def find_col_nan(d):
    col = []
    for c in d.columns:
        if d[c].isnull().any():
            col = np.append(col, c)
    return col



In [6]:

    
missing_col = find_col_nan(data)
missing_col









    Out[6]:





array(['height', 'weight', 'position', 'photoID', 'rater1', 'rater2',
       'Alpha_3', 'meanIAT', 'nIAT', 'seIAT', 'meanExp', 'nExp', 'seExp'], 
      dtype='<U32')

To clean the data, we will go step by step:

- First of all we have to clean all dayads that don't have any rating, because those dayads are uneseful for our problem.
- Then we will look again which columns contains missing values and how to deal with them



In [7]:

    
data = data[ ~data.rater1.isnull() & ~data.rater2.isnull()]
print('Number of row with the 2 ratings {} ({:.3f}%)'.format(len(data), len(data)/len(data_total) * 100))
onlyOne = data[ ~data.rater1.isnull() ^ ~data.rater2.isnull()]
print('Number of row with only one ratings {} ({:.3f}%)'.format(len(onlyOne), len(onlyOne)/len(data_total) * 100))









    



Number of row with the 2 ratings 124621 (85.340%)
Number of row with only one ratings 0 (0.000%)

Check now how many incomplete dayads our data contains now



In [8]:

    
complete = len(data.dropna())
all_ = len(data)
print("After removing data without rating:")
print("-----------------------------------")
print('Number of row with complete data: {} ({:.3f}%)'.format(complete, (complete/all_ ) * 100 ))
print('Number of row with missing data: {} ({:.3f}%)'.format(all_-complete, (all_ -complete)/all_  * 100 ))









    



After removing data without rating:
-----------------------------------
Number of row with complete data: 115457 (92.647%)
Number of row with missing data: 9164 (7.353%)

Let's check again the columns with missing values



In [9]:

    
missing_col = find_col_nan(data)
missing_col









    Out[9]:





array(['height', 'weight', 'position', 'Alpha_3', 'meanIAT', 'nIAT',
       'seIAT', 'meanExp', 'nExp', 'seExp'], 
      dtype='<U32')

We only 7.4% of incomplete dayads. Let's try to replace the nan in heights and weights with the median



In [10]:

    
# replace no height and weight with the mean value
median_height = np.median(data['height'].dropna())
median_weight = np.median(data['weight'].dropna())
data['height'] = data['height'].fillna(value=median_height)
data['weight'] = data['weight'].fillna(value=median_weight)



In [11]:

    
complete = len(data.dropna())
all_ = len(data)
print("After removing data without rating:")
print("-----------------------------------")
print('Number of row with complete data: {} ({:.3f}%)'.format(complete, (complete/all_ ) * 100 ))
print('Number of row with missing data: {} ({:.3f}%)'.format(all_-complete, (all_ -complete)/all_  * 100 ))









    



After removing data without rating:
-----------------------------------
Number of row with complete data: 116014 (93.093%)
Number of row with missing data: 8607 (6.907%)



In [12]:

    
missing_col = find_col_nan(data)
missing_col









    Out[12]:





array(['position', 'Alpha_3', 'meanIAT', 'nIAT', 'seIAT', 'meanExp',
       'nExp', 'seExp'], 
      dtype='<U32')

The incomplete row only represent 7%. We will check if 2 dayads with referee of the same country, one with complete data and an other one with missing IAT and Exp info exist. In that case it is easy to complete those information



In [13]:

    
missing_col_test = ['meanIAT', 'nIAT', 'seIAT', 'meanExp',
       'nExp', 'seExp']
exist = False
def checkMissingTest(df):
    for col in missing_col_test:
        nbr_dayads = len(df)
        nbr_noNaN = len(df.dropna(subset=[col]))
        if nbr_dayads > nbr_noNaN &  nbr_noNaN > 0:
            exist = True
            print('There exist valid data for ', df.Alpha_3)
grouped = pd.groupby(data, by='refCountry').apply(checkMissingTest)      
print('Does it exist 2 dayads of same country, one with info on test and one with missing values in test ?: ', exist)









    



Does it exist 2 dayads of same country, one with info on test and one with missing values in test ?:  False

Look at how many player don't have a position



In [14]:

    
complete = len(data.dropna(subset=['position']))
all_ = len(data)
print("After removing data without rating:")
print("-----------------------------------")
print('Number of row with complete data: {} ({:.3f}%)'.format(complete, (complete/all_ ) * 100 ))
print('Number of row with missing data: {} ({:.3f}%)'.format(all_-complete, (all_ -complete)/all_  * 100 ))









    



After removing data without rating:
-----------------------------------
Number of row with complete data: 116160 (93.211%)
Number of row with missing data: 8461 (6.789%)

For those player we will try to assign them a new position called 'Joker'



In [15]:

    
data.position = data.position.fillna('Joker')



In [16]:

    
missing_col = find_col_nan(data)
missing_col









    Out[16]:





array(['Alpha_3', 'meanIAT', 'nIAT', 'seIAT', 'meanExp', 'nExp', 'seExp'], 
      dtype='<U32')



In [17]:

    
complete = len(data.dropna())
all_ = len(data)
print("After removing data without rating:")
print("-----------------------------------")
print('Number of row with complete data: {} ({:.3f}%)'.format(complete, (complete/all_ ) * 100 ))
print('Number of row with missing data: {} ({:.3f}%)'.format(all_-complete, (all_ -complete)/all_  * 100 ))









    



After removing data without rating:
-----------------------------------
Number of row with complete data: 124468 (99.877%)
Number of row with missing data: 153 (0.123%)

The remaining NaN represent only 0.1% and concern the columns about country and the test in those country which are hard to guess. So we deceide to drop the remaining dayad with NaN



In [18]:

    
data = data.dropna()
find_col_nan(data)









    Out[18]:





[]

1.2. Handling different ratings (rater1 vs rater2)

We check if there is player with only one rating
We also check if one rater always give the same rate to the same player
Now we need to take one single rating for each row. We'll need to investigate a bit more the different rating

Is there players with only one rating?



In [19]:

    
(data.rater1.isnull() | data.rater2.isnull()).any()









    Out[19]:





False

Are the raters consistent?



In [20]:

    
def areRaterConsistent(d):
    for playerID in d.playerShort.unique():
  
        player = d[d.playerShort == playerID]
        rater1 = player.rater1.unique()
        rater2 = player.rater2.unique()
        if len(rater1) >1 or len(rater2) > 1:
            return False
    return True



In [21]:

    
print("Are the rater consistent: ",areRaterConsistent(data))









    



Are the rater consistent:  True

Investigation on the raters

Values of the rating



In [22]:

    
data.rater1.unique()









    Out[22]:





array([ 0.25,  0.75,  0.  ,  1.  ,  0.5 ])

Are the raters always agree



In [23]:

    
print("percentage of players with different ratings: ", len(data[data['rater1'] != data['rater2']])*100 / len(data), "%")









    



percentage of players with different ratings:  23.18989619821962 %

Let's show some plost to visualize when the rater differe



In [24]:

    
len(data)









    Out[24]:





124468



In [25]:

    
fig, ax = plt.subplots(1, 4, figsize=(16, 4))
ax[0].hist([data['rater1'], data['rater2']], bins=5)
ax[0].set_title("1) Raters compared \n(blue: rater1, green: rater2)")

ax[1].hist(abs(data['rater1'] - data['rater2']), bins=3)
ax[1].set_title("2) Difference (Rater1 - Rater2)")

dissagree_data = data[data['rater1'] != data['rater2']]
ax[2].hist(dissagree_data['rater1'], bins=5)
ax[2].set_title("3) Dissagreed values Rater1")

ax[3].hist(dissagree_data['rater2'], bins=5, color='seagreen')
ax[3].set_title("4) Dissagreed values Rater2")









    Out[25]:





<matplotlib.text.Text at 0x7fe47e67cb00>

We can see that rater1 and 2 disagree the most when they have to rate "white" people.

We can also see with graph2 that when there disagree it's ony of one category.

Now we will create a new columns: color_skin that will be our label to guess. To convert the values of rater 1 and 2 in one rate, we need to follow some rules that come from the graph:

1. if rater1 and rater2 are agree, take that value
2. We can see on graph 4 that when rater2 give 0, usually, rater1 agrees => so when rater2 give 0, we take that value as the color skin
3. In graph 3, when rater1 give 1, rater2 usually agrees => when rater1 give 1, take that value as the color_skin
4. In graph 3, we can see that when rater1 rate1 give 0.25, usually rater2 agrees => take rater1
5. choose at random between both values

Then we can drop the features rater1, rater2 since we don't need them anymore



In [26]:

    
data['color_rating'] = -1



In [27]:

    
def color_skin_rules(row):
    #Rule 1
    if row.rater1 == row.rater2:
        return row.rater1
    #Rule2    
    elif row.rater2 == 0:
        return 0
    #Rule 3
    elif row.rater1 == 1:
        return 1
    #Rule 4
    elif row.rater1 == 0.25:
        return 0.25
    else: 
        return np.random.choice([row.rater1, row.rater2])
data.color_rating = data.apply(color_skin_rules, axis=1)

And now make the color_rating categorical:



In [28]:

    
categorical_dict = {0: 1, 0.25: 2, 0.5: 3, 0.75: 4, 1: 5 }
data['color_rating'] = data.apply(lambda row: categorical_dict[row['color_rating']]
, axis=1).astype('category')



In [29]:

    
data = data.drop(['rater1', 'rater2'], axis=1)



In [30]:

    
data_cleaned=data.copy()
data_cleaned.to_csv('CrowdstormingDataJuly1st_preprocessed.csv')
data_cleaned.head()









    Out[30]:






  
    
      
      playerShort
      player
      club
      leagueCountry
      birthday
      height
      weight
      position
      games
      victories
      ties
      defeats
      goals
      yellowCards
      yellowReds
      redCards
      photoID
      refNum
      refCountry
      Alpha_3
      meanIAT
      nIAT
      seIAT
      meanExp
      nExp
      seExp
      color_rating
    
  
  
    
      0
      lucas-wilchez
      Lucas Wilchez
      Real Zaragoza
      Spain
      31.08.1983
      177.0
      72.0
      Attacking Midfielder
      1
      0
      0
      1
      0
      0
      0
      0
      95212.jpg
      1
      1
      GRC
      0.326391
      712.0
      0.000564
      0.396000
      750.0
      0.002696
      2
    
    
      1
      john-utaka
      John Utaka
      Montpellier HSC
      France
      08.01.1982
      179.0
      82.0
      Right Winger
      1
      0
      0
      1
      0
      1
      0
      0
      1663.jpg
      2
      2
      ZMB
      0.203375
      40.0
      0.010875
      -0.204082
      49.0
      0.061504
      4
    
    
      5
      aaron-hughes
      Aaron Hughes
      Fulham FC
      England
      08.11.1979
      182.0
      71.0
      Center Back
      1
      0
      0
      1
      0
      0
      0
      0
      3868.jpg
      4
      4
      LUX
      0.325185
      127.0
      0.003297
      0.538462
      130.0
      0.013752
      1
    
    
      6
      aleksandar-kolarov
      Aleksandar Kolarov
      Manchester City
      England
      10.11.1985
      187.0
      80.0
      Left Fullback
      1
      1
      0
      0
      0
      0
      0
      0
      47704.jpg
      4
      4
      LUX
      0.325185
      127.0
      0.003297
      0.538462
      130.0
      0.013752
      1
    
    
      7
      alexander-tettey
      Alexander Tettey
      Norwich City
      England
      04.04.1986
      180.0
      68.0
      Defensive Midfielder
      1
      0
      0
      1
      0
      0
      0
      0
      22356.jpg
      4
      4
      LUX
      0.325185
      127.0
      0.003297
      0.538462
      130.0
      0.013752
      5

Aggregate the data

One solution is to group the data by player name. Then we need to find a solution to correctly group the remaining features:

- club: we have to check if a player appear in 2 different club (in case of a transfer during the winter mercato ) or if the transfer are not taking into account. (-> one (several) hot encoding. or majority dyads per club)
- leagueCountry: same as club
- position: test if the player as different -> position with the majority of game? 
- photoID: drop that information, the id is unique -> not relevant for our classification probleme
- refNum: replace with the total of unique refs
- refCountry: same as refNum
- Alpha_3: remove: redundant information since it correspond to the refCountry
- meanIAT: make new features
    - take mean 
    - take weighted mean (weight with nIAT)
    - take weighted mean (weight with game numers)
- meanExp: same as IAT
- seAIT: 
- seExp:

First do some checks



In [31]:

    
clubUnique = True
leagueUnique = True
positionUnique = True
def checkFunction(player):

    #check if the club is unique for one player
    if len(player.club.unique()) > 1:
        clubUnique = False
        print(player.player, 'plays for more than one team: ', player.club.unique())
    #check if the leagueCountry is unique
    if len(player.leagueCountry.unique()) > 1:
        leagueUnique = False
        print(player.player, 'plays for more than one league: ', player.leagueCountry.unique())
    #check if the position is unique
    if len(player.position.unique()) > 1:
        positionUnique = False
        print(player.player, 'plays for more than one position: ', player.position.unique())
           
data_group = pd.groupby(data_cleaned, by=data_cleaned.playerShort).apply(checkFunction)
print("Is the club for a player unique? ", clubUnique)
print("Is the league for a player unique? ", leagueUnique)
print("Is the position for a player unique? ", positionUnique)









    



Is the club for a player unique?  True
Is the league for a player unique?  True
Is the position for a player unique?  True

Then aggregate



In [ ]:



In [32]:

    
def meanCards(row, test):
    total_cards =   row.yellowCards + row.yellowReds + row.redCards     
    if total_cards == 0:
            return 0
    else:
        if(test == 'IAT'):
            return (row.meanIAT * row.yellowCards) + (row.meanIAT * row.yellowReds)  \
             + (row.meanIAT * row.redCards) / total_cards
        else:  
            return (row.meanExp * row.yellowCards) + (row.meanExp * row.yellowReds)  \
             + (row.meanExp * row.redCards) / total_cards
            

def aggreagtion(df):
    first_entry = df.head(1)
    # new aggregation entry
    new_entry = first_entry.copy()
    
    #sum of the info about the games
    new_entry.games = df.games.sum()
    new_entry.victories = df.victories.sum()
    new_entry.ties = df.ties.sum()
    new_entry.defeats = df.defeats.sum()
    new_entry.goals = df.goals.sum()
    new_entry.yellowCards = df.yellowCards.sum()
    new_entry.yellowReds = df.yellowReds.sum()
    new_entry.redCards = df.redCards.sum()
    
    #drop photoID and alpha_3
    new_entry.drop('photoID', inplace = True, axis=1)
    new_entry.drop('Alpha_3', inplace = True, axis=1)
    
    #refNum: number of unique ref
    new_entry = new_entry.rename(columns = {'refNum': 'refCount'})
    new_entry.refCount = len(df.refNum.unique())
    
    #refCountry: replace by number of unique country
    new_entry = new_entry.rename(columns = {'refCountry': 'refCountryCount'})
    new_entry.refCountryCount = len(df.refCountry.unique())
    
    #==Mean of the test result ===
    #- take mean 
    #- take weighted mean (weight with nIAT)
    #- take weighted mean (weight with game numers)
    
    new_entry.meanIAT = df.meanIAT.mean()
    new_entry.meanExp = df.meanExp.mean()
    
    new_entry['meanIAT_nIAT'] = (df.meanIAT * df.nIAT).sum() / df.nIAT.sum() 
    new_entry['meanExp_nExp'] = (df.meanExp * df.nExp).sum() / df.nExp.sum()
    
    new_entry['meanIAT_GameNbr'] = (df.meanIAT * df.games).sum() / df.games.sum()
    new_entry['meanExp_GameNbr'] = (df.meanExp * df.games).sum() / df.games.sum()
    
    new_entry['meanIAT_cards'] = df.apply(lambda r : meanCards(r, test='IAT'), axis = 1)   
    new_entry['meanExp_cards'] = df.apply(lambda r: meanCards(r, test = 'Exp'), axis = 1)
    
    #????????????????????? DROP nIART nExp or NOT ?????????????????????????????
    new_entry.drop('nIAT', inplace = True, axis=1)
    new_entry.drop('nExp', inplace = True, axis=1)
    
    # standard error = standard deviation / sqrt(n)
    #mean of the standard deviation: mean of the variance and then sqrt
    varIAT = (df.seIAT * np.sqrt(df.nIAT)) ** 2
    new_entry.seIAT = np.sqrt(np.mean(varIAT))/ np.sqrt(df.nIAT)
    varExp = (df.seExp * np.sqrt(df.nExp)) ** 2
    new_entry.seExp = np.sqrt(np.mean(varExp))/ np.sqrt(df.nExp)
    return new_entry
 
data_agregated = pd.groupby(data_cleaned, by="playerShort").apply(aggreagtion)
data_agregated









    Out[32]:






  
    
      
      
      playerShort
      player
      club
      leagueCountry
      birthday
      height
      weight
      position
      games
      victories
      ties
      defeats
      goals
      yellowCards
      yellowReds
      redCards
      refCount
      refCountryCount
      meanIAT
      seIAT
      meanExp
      seExp
      color_rating
      meanIAT_nIAT
      meanExp_nExp
      meanIAT_GameNbr
      meanExp_GameNbr
      meanIAT_cards
      meanExp_cards
    
    
      playerShort
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      aaron-hughes
      5
      aaron-hughes
      Aaron Hughes
      Fulham FC
      England
      08.11.1979
      182.0
      71.0
      Center Back
      654
      247
      179
      228
      9
      19
      0
      0
      166
      37
      0.346459
      0.001505
      0.494575
      0.009691
      1
      0.328409
      0.367721
      0.333195
      0.400637
      0.000000
      0.000000
    
    
      aaron-hunt
      745
      aaron-hunt
      Aaron Hunt
      Werder Bremen
      Germany
      04.09.1986
      183.0
      73.0
      Attacking Midfielder
      336
      141
      73
      122
      62
      42
      0
      1
      99
      25
      0.348818
      0.000834
      0.449220
      0.003823
      2
      0.329945
      0.441615
      0.341438
      0.380811
      0.000000
      0.000000
    
    
      aaron-lennon
      746
      aaron-lennon
      Aaron Lennon
      Tottenham Hotspur
      England
      16.04.1987
      165.0
      63.0
      Right Midfielder
      412
      200
      97
      115
      31
      11
      0
      0
      101
      28
      0.345893
      0.001113
      0.491482
      0.006350
      2
      0.328230
      0.365628
      0.332389
      0.399459
      0.000000
      0.000000
    
    
      aaron-ramsey
      747
      aaron-ramsey
      Aaron Ramsey
      Arsenal FC
      England
      26.12.1990
      178.0
      76.0
      Center Midfielder
      260
      150
      42
      68
      39
      31
      0
      1
      104
      37
      0.346821
      0.003786
      0.514693
      0.015240
      1
      0.327775
      0.412859
      0.336638
      0.433294
      0.000000
      0.000000
    
    
      abdelhamid-el-kaoutari
      4418
      abdelhamid-el-kaoutari
      Abdelhamid El-Kaoutari
      Montpellier HSC
      France
      17.03.1990
      180.0
      73.0
      Center Back
      124
      41
      40
      43
      1
      8
      4
      2
      37
      11
      0.331600
      0.000474
      0.335587
      0.001745
      2
      0.338847
      0.379497
      0.331882
      0.328895
      0.000000
      0.000000
    
    
      abdou-traore_2
      2305
      abdou-traore_2
      Abdou Traoré
      Girondins Bordeaux
      France
      17.01.1988
      180.0
      74.0
      Right Midfielder
      97
      41
      23
      33
      3
      11
      1
      0
      42
      13
      0.320079
      0.000889
      0.296562
      0.002659
      4
      0.332117
      0.366031
      0.327985
      0.317247
      0.000000
      0.000000
    
    
      abdoulaye-diallo_2
      47
      abdoulaye-diallo_2
      Abdoulaye Diallo
      Stade Rennes
      France
      30.03.1992
      189.0
      80.0
      Goalkeeper
      24
      8
      8
      8
      0
      0
      0
      0
      20
      5
      0.341625
      0.000186
      0.400818
      0.000774
      4
      0.337572
      0.361394
      0.343556
      0.428271
      0.000000
      0.000000
    
    
      abdoulaye-keita_2
      4423
      abdoulaye-keita_2
      Abdoulaye Keita
      Girondins Bordeaux
      France
      19.08.1990
      188.0
      83.0
      Goalkeeper
      3
      0
      1
      2
      0
      0
      0
      0
      2
      2
      0.355406
      0.000249
      0.417225
      0.001029
      4
      0.341567
      0.363284
      0.348498
      0.390184
      0.000000
      0.000000
    
    
      abdoulwhaid-sissoko
      4424
      abdoulwhaid-sissoko
      Abdoulwhaid Sissoko
      Stade Brest
      France
      20.03.1990
      180.0
      68.0
      Defensive Midfielder
      121
      34
      25
      62
      3
      21
      0
      2
      56
      3
      0.348178
      0.000163
      0.429630
      0.000674
      5
      0.344426
      0.404517
      0.344130
      0.402314
      0.000000
      0.000000
    
    
      abdul-rahman-baba
      16617
      abdul-rahman-baba
      Abdul Rahman Baba
      SpVgg Greuther Fürth
      Germany
      02.07.1994
      179.0
      70.0
      Left Fullback
      50
      17
      8
      25
      0
      3
      0
      1
      27
      7
      0.342072
      0.000103
      0.361068
      0.000553
      4
      0.337478
      0.339232
      0.339733
      0.341395
      0.000000
      0.000000
    
    
      abdul-razak
      3802
      abdul-razak
      Abdul Razak
      Manchester City
      England
      11.11.1992
      180.0
      78.0
      Center Midfielder
      36
      16
      8
      12
      0
      2
      0
      0
      33
      10
      0.354566
      0.000989
      0.536408
      0.006177
      5
      0.329069
      0.369694
      0.352243
      0.521411
      0.000000
      0.000000
    
    
      abel-aguilar
      2306
      abel-aguilar
      Abel Aguilar
      Deportivo La Coruña
      Spain
      06.01.1985
      185.0
      78.0
      Defensive Midfielder
      246
      93
      61
      92
      25
      70
      3
      2
      110
      22
      0.359183
      0.000400
      0.549105
      0.002156
      3
      0.330335
      0.459423
      0.362848
      0.557574
      0.369894
      0.588297
    
    
      abelaziz-barrada
      343
      abelaziz-barrada
      Abelaziz Barrada
      Getafe CF
      Spain
      19.06.1989
      185.0
      78.0
      Joker
      85
      28
      25
      32
      14
      11
      1
      1
      39
      18
      0.341541
      0.004548
      0.399237
      0.023271
      1
      0.351721
      0.456120
      0.356885
      0.501552
      0.000000
      0.000000
    
    
      abou-diaby
      458
      abou-diaby
      Abou Diaby
      Arsenal FC
      England
      11.05.1986
      188.0
      75.0
      Center Midfielder
      208
      109
      51
      48
      21
      24
      0
      2
      65
      21
      0.336906
      0.003345
      0.422867
      0.016387
      4
      0.328151
      0.363045
      0.331484
      0.390292
      0.000000
      0.000000
    
    
      adam-bodzek
      14239
      adam-bodzek
      Adam Bodzek
      Fortuna Düsseldorf
      Germany
      07.09.1985
      184.0
      79.0
      Defensive Midfielder
      211
      88
      63
      60
      9
      66
      1
      0
      56
      1
      0.336628
      0.000055
      0.335967
      0.000225
      2
      0.336628
      0.335967
      0.336628
      0.335967
      0.000000
      0.000000
    
    
      adam-campbell
      1884
      adam-campbell
      Adam Campbell
      Newcastle United
      England
      01.01.1995
      168.0
      76.0
      Joker
      20
      6
      5
      9
      2
      1
      0
      0
      17
      7
      0.333246
      0.000037
      0.389978
      0.000207
      1
      0.327112
      0.357443
      0.332263
      0.384948
      0.000000
      0.000000
    
    
      adam-federici
      1922
      adam-federici
      Adam Federici
      Reading FC
      England
      31.01.1985
      188.0
      90.0
      Goalkeeper
      206
      83
      54
      69
      1
      4
      0
      0
      81
      10
      0.330250
      0.000042
      0.380157
      0.000284
      1
      0.327063
      0.413317
      0.328090
      0.365769
      0.000000
      0.000000
    
    
      adam-hlousek
      1589
      adam-hlousek
      Adam Hloušek
      1. FC Nürnberg
      Germany
      20.12.1988
      188.0
      76.0
      Left Midfielder
      131
      46
      34
      51
      13
      21
      0
      0
      60
      17
      0.364772
      0.000204
      0.498535
      0.000977
      1
      0.338019
      0.371041
      0.369815
      0.507140
      0.000000
      0.000000
    
    
      adam-johnson
      749
      adam-johnson
      Adam Johnson
      Sunderland AFC
      England
      14.07.1987
      175.0
      62.0
      Left Midfielder
      339
      145
      68
      126
      55
      21
      0
      0
      92
      22
      0.338806
      0.000718
      0.418797
      0.003206
      1
      0.327601
      0.360663
      0.330414
      0.374101
      0.000000
      0.000000
    
    
      adam-morgan
      10385
      adam-morgan
      Adam Morgan
      Liverpool FC
      England
      21.04.1994
      179.0
      76.0
      Joker
      23
      8
      7
      8
      1
      0
      0
      0
      23
      11
      0.346997
      0.000130
      0.448102
      0.000357
      2
      0.327470
      0.359985
      0.346997
      0.448102
      0.000000
      0.000000
    
    
      adam-pinter
      1590
      adam-pinter
      Ádám Pintér
      Real Zaragoza
      Spain
      12.06.1988
      190.0
      76.0
      Center Back
      82
      28
      18
      36
      1
      14
      1
      1
      56
      23
      0.365417
      0.000373
      0.630062
      0.002062
      1
      0.340622
      0.440773
      0.366836
      0.616819
      0.000000
      0.000000
    
    
      adam-smith_3
      532
      adam-smith_3
      Adam Smith
      Millwall FC
      England
      29.04.1991
      173.0
      73.0
      Right Fullback
      131
      55
      31
      45
      4
      29
      2
      1
      70
      11
      0.331529
      0.000411
      0.388481
      0.002301
      1
      0.326793
      0.356671
      0.329276
      0.373564
      0.000000
      0.000000
    
    
      adam-szalai
      1591
      adam-szalai
      Ádám Szalai
      1. FSV Mainz 05
      Germany
      09.12.1987
      193.0
      87.0
      Center Forward
      182
      83
      40
      59
      51
      18
      1
      0
      70
      25
      0.347791
      0.000223
      0.453066
      0.001301
      2
      0.336896
      0.365094
      0.341194
      0.382994
      0.000000
      0.000000
    
    
      adan
      1592
      adan
      Adán
      Real Madrid
      Spain
      13.05.1987
      190.0
      88.0
      Goalkeeper
      44
      22
      6
      16
      0
      1
      0
      1
      31
      13
      0.369184
      0.000253
      0.583849
      0.001631
      1
      0.348044
      0.471570
      0.369231
      0.584720
      0.000000
      0.000000
    
    
      adel-taarabt
      1924
      adel-taarabt
      Adel Taarabt
      Queens Park Rangers
      England
      24.05.1989
      180.0
      78.0
      Left Midfielder
      209
      67
      56
      86
      41
      23
      1
      0
      84
      11
      0.326478
      0.000105
      0.335564
      0.000441
      2
      0.327227
      0.358152
      0.327106
      0.350180
      0.000000
      0.000000
    
    
      adil-rami
      750
      adil-rami
      Adil Rami
      Valencia CF
      Spain
      27.12.1985
      190.0
      88.0
      Center Back
      286
      144
      71
      71
      20
      59
      2
      3
      101
      25
      0.354539
      0.001044
      0.492047
      0.004972
      2
      0.337809
      0.401877
      0.351279
      0.460517
      0.325185
      0.538462
    
    
      adrian-colunga
      1594
      adrian-colunga
      Adrián Colunga
      Getafe CF
      Spain
      17.11.1984
      171.0
      69.0
      Center Forward
      178
      53
      43
      82
      39
      22
      1
      0
      47
      7
      0.365679
      0.000167
      0.576993
      0.000738
      2
      0.366673
      0.572510
      0.368781
      0.585312
      0.000000
      0.000000
    
    
      adrian-mutu
      494
      adrian-mutu
      Adrian Mutu
      AC Ajaccio
      France
      08.01.1979
      180.0
      74.0
      Left Winger
      453
      199
      120
      134
      178
      90
      2
      3
      160
      32
      0.364151
      0.000535
      0.533691
      0.002496
      2
      0.334899
      0.390277
      0.372146
      0.519101
      0.000000
      0.000000
    
    
      adrian_2
      649
      adrian_2
      Adrián
      Atlético Madrid
      Spain
      08.01.1988
      183.0
      73.0
      Center Forward
      294
      145
      62
      87
      60
      16
      0
      0
      86
      31
      0.357048
      0.000304
      0.517618
      0.001735
      1
      0.329110
      0.445014
      0.365247
      0.563727
      0.000000
      0.000000
    
    
      adrian_7
      611
      adrian_7
      Adrián
      SD Compostela
      Spain
      21.05.1983
      188.0
      82.0
      Goalkeeper
      29
      10
      11
      8
      0
      3
      0
      0
      28
      1
      0.369894
      0.000229
      0.588297
      0.001002
      2
      0.369894
      0.588297
      0.369894
      0.588297
      0.000000
      0.000000
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      xisco_2
      2225
      xisco_2
      Xisco
      Newcastle United
      England
      26.06.1986
      188.0
      85.0
      Center Forward
      166
      68
      28
      70
      46
      26
      0
      0
      72
      11
      0.361779
      0.000048
      0.545934
      0.000217
      2
      0.336222
      0.407244
      0.365399
      0.562427
      0.000000
      0.000000
    
    
      yacine-brahimi
      218
      yacine-brahimi
      Yacine Brahimi
      Granada CF
      Spain
      08.02.1990
      175.0
      65.0
      Attacking Midfielder
      149
      59
      30
      60
      19
      20
      2
      0
      70
      18
      0.343521
      0.000718
      0.425677
      0.002091
      3
      0.345511
      0.420015
      0.348997
      0.450965
      0.000000
      0.000000
    
    
      yann-mvila
      1869
      yann-mvila
      Yann M'Vila
      Stade Rennes
      France
      26.06.1990
      169.0
      56.0
      Defensive Midfielder
      203
      91
      52
      60
      4
      29
      1
      1
      86
      24
      0.358188
      0.000193
      0.570958
      0.000962
      3
      0.333653
      0.386629
      0.349619
      0.497316
      0.000000
      0.000000
    
    
      yannick-djalo
      1217
      yannick-djalo
      Yannick Djaló
      Toulouse FC
      France
      05.05.1986
      171.0
      62.0
      Center Forward
      167
      73
      46
      48
      29
      10
      0
      0
      78
      25
      0.363715
      0.001151
      0.582103
      0.005573
      4
      0.328302
      0.458565
      0.377326
      0.667046
      0.000000
      0.000000
    
    
      yannik-schulze
      8964
      yannik-schulze
      Yannik Schulze
      Hannover 96
      Germany
      05.02.1995
      186.0
      77.0
      Center Back
      49
      20
      12
      17
      3
      8
      0
      0
      35
      1
      0.336628
      0.000055
      0.335967
      0.000225
      1
      0.336628
      0.335967
      0.336628
      0.335967
      0.000000
      0.000000
    
    
      yassine-benzia
      4004
      yassine-benzia
      Yassine Benzia
      Olympique Lyon
      France
      08.09.1994
      179.0
      71.0
      Joker
      60
      32
      15
      13
      19
      7
      0
      0
      43
      23
      0.337672
      0.002019
      0.446000
      0.010735
      2
      0.327654
      0.454715
      0.339181
      0.439335
      0.295086
      0.488235
    
    
      yassine-el-ghanassi
      1218
      yassine-el-ghanassi
      Yassine El Ghanassi
      West Bromwich Albion
      England
      12.07.1990
      173.0
      76.0
      Left Winger
      181
      79
      42
      60
      26
      15
      0
      0
      67
      21
      0.360055
      0.000956
      0.510997
      0.004346
      3
      0.342939
      0.428957
      0.361042
      0.540813
      0.000000
      0.000000
    
    
      yassine-jebbour
      4871
      yassine-jebbour
      Yassine Jebbour
      Stade Rennes
      France
      24.08.1991
      181.0
      70.0
      Left Fullback
      54
      16
      15
      23
      0
      2
      1
      2
      30
      8
      0.343441
      0.000184
      0.420413
      0.000791
      4
      0.337430
      0.371346
      0.339549
      0.382941
      0.000000
      0.000000
    
    
      yaya-toure
      236
      yaya-toure
      Yaya Touré
      Manchester City
      England
      13.05.1983
      191.0
      90.0
      Defensive Midfielder
      448
      255
      83
      110
      79
      82
      0
      1
      152
      44
      0.346369
      0.002599
      0.428465
      0.014016
      5
      0.333713
      0.393813
      0.346490
      0.439967
      0.000000
      0.000000
    
    
      yoan-gouffran
      1220
      yoan-gouffran
      Yoan Gouffran
      Girondins Bordeaux
      France
      25.05.1986
      175.0
      70.0
      Right Winger
      269
      121
      67
      81
      59
      18
      0
      1
      75
      20
      0.338854
      0.001154
      0.415115
      0.006546
      3
      0.329061
      0.363175
      0.335075
      0.367163
      0.000000
      0.000000
    
    
      yoann-gourcuff
      476
      yoann-gourcuff
      Yoann Gourcuff
      Olympique Lyon
      France
      11.07.1986
      185.0
      79.0
      Attacking Midfielder
      341
      178
      73
      90
      51
      31
      0
      2
      120
      34
      0.350000
      0.003457
      0.466304
      0.017888
      1
      0.329477
      0.444544
      0.344501
      0.410531
      0.000000
      0.000000
    
    
      yohan-cabaye
      1222
      yohan-cabaye
      Yohan Cabaye
      Newcastle United
      England
      14.01.1986
      175.0
      69.0
      Defensive Midfielder
      372
      178
      106
      88
      56
      86
      1
      1
      119
      30
      0.345317
      0.001287
      0.443883
      0.006975
      1
      0.329930
      0.366518
      0.337574
      0.386732
      0.325185
      0.538462
    
    
      yohandry-orozco
      488
      yohandry-orozco
      Yohandry Orozco
      VfL Wolfsburg
      Germany
      19.03.1991
      164.0
      55.0
      Attacking Midfielder
      67
      25
      22
      20
      14
      4
      0
      0
      46
      20
      0.375346
      0.001731
      0.656897
      0.010125
      3
      0.327946
      0.458521
      0.379415
      0.717210
      0.000000
      0.000000
    
    
      yossi-benayoun
      1872
      yossi-benayoun
      Yossi Benayoun
      West Ham United
      England
      05.05.1980
      178.0
      70.0
      Attacking Midfielder
      456
      201
      105
      150
      88
      31
      1
      0
      147
      34
      0.353193
      0.000281
      0.522966
      0.001805
      2
      0.328425
      0.412539
      0.344829
      0.461143
      0.000000
      0.000000
    
    
      younes-belhanda
      4007
      younes-belhanda
      Younès Belhanda
      Montpellier HSC
      France
      25.02.1990
      174.0
      75.0
      Attacking Midfielder
      173
      77
      42
      54
      37
      32
      2
      3
      64
      24
      0.348692
      0.002790
      0.541128
      0.014250
      2
      0.338342
      0.394386
      0.342731
      0.456223
      0.295086
      0.488235
    
    
      younes-kaboul
      1223
      younes-kaboul
      Younes Kaboul
      Tottenham Hotspur
      England
      04.01.1986
      190.0
      87.0
      Center Back
      252
      110
      61
      81
      18
      25
      4
      2
      87
      23
      0.343357
      0.001142
      0.444511
      0.006415
      3
      0.328878
      0.365134
      0.333986
      0.391915
      0.000000
      0.000000
    
    
      youssef-el-arabi
      222
      youssef-el-arabi
      Youssef El-Arabi
      Granada CF
      Spain
      03.02.1987
      180.0
      75.0
      Center Forward
      159
      52
      47
      60
      57
      15
      0
      0
      67
      15
      0.335832
      0.000537
      0.363995
      0.002771
      3
      0.342493
      0.388743
      0.343162
      0.412267
      0.000000
      0.000000
    
    
      yunus-malli
      9010
      yunus-malli
      Yunus Malli
      1. FSV Mainz 05
      Germany
      24.02.1992
      179.0
      70.0
      Attacking Midfielder
      131
      59
      32
      40
      24
      4
      0
      0
      81
      6
      0.337992
      0.000062
      0.356349
      0.000276
      1
      0.336189
      0.340789
      0.337471
      0.348570
      0.000000
      0.000000
    
    
      zdenk-pospch
      1224
      zdenk-pospch
      Zdeněk Pospěch
      1. FSV Mainz 05
      Germany
      14.12.1978
      174.0
      72.0
      Right Fullback
      297
      144
      67
      86
      19
      26
      1
      0
      104
      34
      0.353786
      0.001586
      0.521861
      0.007173
      1
      0.334409
      0.385939
      0.350481
      0.494984
      0.000000
      0.000000
    
    
      zdravko-kuzmanovic
      34
      zdravko-kuzmanovic
      Zdravko Kuzmanović
      VfB Stuttgart
      Germany
      22.09.1987
      186.0
      71.0
      Defensive Midfielder
      339
      156
      77
      106
      38
      42
      1
      1
      136
      36
      0.358571
      0.001509
      0.500788
      0.008648
      1
      0.336422
      0.382447
      0.358419
      0.462541
      0.000000
      0.000000
    
    
      ze-castro
      1874
      ze-castro
      Zé Castro
      Deportivo La Coruña
      Spain
      13.01.1983
      183.0
      76.0
      Center Back
      169
      77
      31
      61
      3
      28
      1
      1
      60
      12
      0.366725
      0.000153
      0.570099
      0.000688
      2
      0.353190
      0.494750
      0.368825
      0.581665
      0.000000
      0.000000
    
    
      zhi-gin-lam
      8716
      zhi-gin-lam
      Zhi Gin Lam
      Hamburger SV
      Germany
      04.06.1991
      175.0
      67.0
      Right Fullback
      127
      53
      22
      52
      7
      8
      0
      0
      82
      1
      0.336628
      0.000055
      0.335967
      0.000225
      2
      0.336628
      0.335967
      0.336628
      0.335967
      0.336628
      0.335967
    
    
      zlatan-alomerovic
      9184
      zlatan-alomerovic
      Zlatan Alomerović
      Borussia Dortmund
      Germany
      15.06.1991
      187.0
      88.0
      Goalkeeper
      111
      50
      27
      34
      0
      10
      0
      1
      68
      1
      0.336628
      0.000055
      0.335967
      0.000225
      1
      0.336628
      0.335967
      0.336628
      0.335967
      0.000000
      0.000000
    
    
      zlatan-ibrahimovic
      520
      zlatan-ibrahimovic
      Zlatan Ibrahimović
      Paris Saint-Germain
      France
      03.10.1981
      192.0
      84.0
      Center Forward
      607
      377
      135
      95
      330
      94
      4
      6
      181
      36
      0.359884
      0.000194
      0.508760
      0.000943
      2
      0.339429
      0.409919
      0.364542
      0.497372
      0.000000
      0.000000
    
    
      zlatko-junuzovic
      2282
      zlatko-junuzovic
      Zlatko Junuzović
      Werder Bremen
      Germany
      26.09.1987
      172.0
      69.0
      Attacking Midfielder
      361
      131
      90
      140
      48
      53
      1
      0
      113
      37
      0.351308
      0.000570
      0.483496
      0.002939
      1
      0.336874
      0.388562
      0.341763
      0.414282
      0.000000
      0.000000
    
    
      zoltan-gera
      1227
      zoltan-gera
      Zoltán Gera
      West Bromwich Albion
      England
      22.04.1979
      181.0
      76.0
      Left Winger
      392
      150
      96
      146
      71
      44
      1
      1
      135
      36
      0.349091
      0.001126
      0.494216
      0.005610
      2
      0.328085
      0.364881
      0.336001
      0.417374
      0.325185
      0.538462
    
    
      zoltan-stieber
      3115
      zoltan-stieber
      Zoltán Stieber
      SpVgg Greuther Fürth
      Germany
      16.10.1988
      175.0
      67.0
      Left Midfielder
      142
      48
      37
      57
      27
      12
      0
      0
      53
      6
      0.337051
      0.000165
      0.360397
      0.000719
      1
      0.335765
      0.340852
      0.336786
      0.345085
      0.000000
      0.000000
    
    
      zoumana-camara
      1228
      zoumana-camara
      Zoumana Camara
      Paris Saint-Germain
      France
      03.04.1979
      182.0
      76.0
      Center Back
      395
      148
      117
      130
      7
      46
      2
      6
      90
      21
      0.348260
      0.001014
      0.426700
      0.004507
      4
      0.331266
      0.370416
      0.338068
      0.363993
      0.325185
      0.538462
    
    
      zubikarai
      3116
      zubikarai
      Zubikarai
      Real Sociedad
      Spain
      26.02.1984
      185.0
      84.0
      Goalkeeper
      47
      14
      15
      18
      0
      2
      0
      2
      29
      2
      0.368884
      0.000227
      0.591902
      0.000992
      1
      0.367255
      0.597637
      0.369270
      0.590521
      0.000000
      0.000000
    
    
      zurutuza
      3117
      zurutuza
      Zurutuza
      Real Sociedad
      Spain
      19.07.1986
      186.0
      78.0
      Defensive Midfielder
      160
      68
      39
      53
      12
      22
      0
      0
      40
      6
      0.365978
      0.000289
      0.590718
      0.001307
      1
      0.368534
      0.578865
      0.368915
      0.588902
      0.369894
      0.588297
    
  

1585 rows × 29 columns



In [33]:

    
data_agregated.to_csv('CrowdstormingDataJuly1st_aggregated.csv')

Encode the data for the Random Forest

The Random Forest can only handle integer or float attributes, so we have to encode the string attributes as numbers.



In [34]:

    
def encode(data_frame, inplace=False):
    """
    encodes the non numerical columns with a LabelEncoder
    returns a new data_frame if inplace = False, otherwise changes the given data_frame
    """
    _df = data_frame if inplace else data_frame.copy()
    le = pp.LabelEncoder() # encoder
    for col_name in _df.columns:
        if _df.dtypes[col_name] == 'O':
            _df[col_name] = le.fit_transform(_df[col_name])
            print("encoded", col_name)
    return _df



In [35]:

    
data_cleaned_encoded = encode(data_cleaned, inplace=False)
data_cleaned_encoded.head(1)









    



encoded playerShort
encoded player
encoded club
encoded leagueCountry
encoded birthday
encoded position
encoded photoID
encoded Alpha_3






    Out[35]:






  
    
      
      playerShort
      player
      club
      leagueCountry
      birthday
      height
      weight
      position
      games
      victories
      ties
      defeats
      goals
      yellowCards
      yellowReds
      redCards
      photoID
      refNum
      refCountry
      Alpha_3
      meanIAT
      nIAT
      seIAT
      meanExp
      nExp
      seExp
      color_rating
    
  
  
    
      0
      901
      1046
      70
      3
      1382
      177.0
      72.0
      0
      1
      0
      0
      1
      0
      0
      0
      0
      1532
      1
      1
      59
      0.326391
      712.0
      0.000564
      0.396
      750.0
      0.002696
      2



In [36]:

    
data_cleaned_aggregated_encoded = encode(data_agregated, inplace=False)
data_cleaned_aggregated_encoded.head(1)









    



encoded playerShort
encoded player
encoded club
encoded leagueCountry
encoded birthday
encoded position






    Out[36]:






  
    
      
      
      playerShort
      player
      club
      leagueCountry
      birthday
      height
      weight
      position
      games
      victories
      ties
      defeats
      goals
      yellowCards
      yellowReds
      redCards
      refCount
      refCountryCount
      meanIAT
      seIAT
      meanExp
      seExp
      color_rating
      meanIAT_nIAT
      meanExp_nExp
      meanIAT_GameNbr
      meanExp_GameNbr
      meanIAT_cards
      meanExp_cards
    
    
      playerShort
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      aaron-hughes
      5
      0
      392
      34
      0
      360
      182.0
      71.0
      1
      654
      247
      179
      228
      9
      19
      0
      0
      166
      37
      0.346459
      0.001505
      0.494575
      0.009691
      1
      0.328409
      0.367721
      0.333195
      0.400637
      0.0
      0.0



In [37]:

    
data_cleaned_encoded.to_csv('CrowdstormingDataJuly1st_preprocessed_encoded.csv')
data_cleaned_aggregated_encoded.set_index("playerShort").to_csv('CrowdstormingDataJuly1st_aggregated_encoded.csv')



In [38]:

    
data_cleaned_aggregated_encoded.set_index("playerShort").head()









    Out[38]:






  
    
      
      player
      club
      leagueCountry
      birthday
      height
      weight
      position
      games
      victories
      ties
      defeats
      goals
      yellowCards
      yellowReds
      redCards
      refCount
      refCountryCount
      meanIAT
      seIAT
      meanExp
      seExp
      color_rating
      meanIAT_nIAT
      meanExp_nExp
      meanIAT_GameNbr
      meanExp_GameNbr
      meanIAT_cards
      meanExp_cards
    
    
      playerShort
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      392
      34
      0
      360
      182.0
      71.0
      1
      654
      247
      179
      228
      9
      19
      0
      0
      166
      37
      0.346459
      0.001505
      0.494575
      0.009691
      1
      0.328409
      0.367721
      0.333195
      0.400637
      0.0
      0.0
    
    
      1
      393
      91
      2
      176
      183.0
      73.0
      0
      336
      141
      73
      122
      62
      42
      0
      1
      99
      25
      0.348818
      0.000834
      0.449220
      0.003823
      2
      0.329945
      0.441615
      0.341438
      0.380811
      0.0
      0.0
    
    
      2
      394
      83
      0
      719
      165.0
      63.0
      11
      412
      200
      97
      115
      31
      11
      0
      0
      101
      28
      0.345893
      0.001113
      0.491482
      0.006350
      2
      0.328230
      0.365628
      0.332389
      0.399459
      0.0
      0.0
    
    
      3
      395
      6
      0
      1199
      178.0
      76.0
      3
      260
      150
      42
      68
      39
      31
      0
      1
      104
      37
      0.346821
      0.003786
      0.514693
      0.015240
      1
      0.327775
      0.412859
      0.336638
      0.433294
      0.0
      0.0
    
    
      4
      396
      51
      1
      758
      180.0
      73.0
      1
      124
      41
      40
      43
      1
      8
      4
      2
      37
      11
      0.331600
      0.000474
      0.335587
      0.001745
      2
      0.338847
      0.379497
      0.331882
      0.328895
      0.0
      0.0



In [ ]:

	playerShort	player	club	leagueCountry	birthday	height	weight	position	games	victories	ties	defeats	yellowCards	photoID	rater1	rater2	refNum	refCountry	Alpha_3	meanIAT	nIAT	seIAT	meanExp	nExp	seExp
0	lucas-wilchez	Lucas Wilchez	Real Zaragoza	Spain	31.08.1983	177.0	72.0	Attacking Midfielder	1	0	0	1	0	95212.jpg	0.25	0.50	1	1	GRC	0.326391	712.0	0.000564	0.396000	750.0	0.002696
1	john-utaka	John Utaka	Montpellier HSC	France	08.01.1982	179.0	82.0	Right Winger	1	0	0	1	1	1663.jpg	0.75	0.75	2	2	ZMB	0.203375	40.0	0.010875	-0.204082	49.0	0.061504
2	abdon-prats	Abdón Prats	RCD Mallorca	Spain	17.12.1992	181.0	79.0	NaN	1	0	1	0	1	NaN	NaN	NaN	3	3	ESP	0.369894	1785.0	0.000229	0.588297	1897.0	0.001002
3	pablo-mari	Pablo Marí	RCD Mallorca	Spain	31.08.1993	191.0	87.0	Center Back	1	1	0	0	0	NaN	NaN	NaN	3	3	ESP	0.369894	1785.0	0.000229	0.588297	1897.0	0.001002
4	ruben-pena	Rubén Peña	Real Valladolid	Spain	18.07.1991	172.0	70.0	Right Midfielder	1	1	0	0	0	NaN	NaN	NaN	3	3	ESP	0.369894	1785.0	0.000229	0.588297	1897.0	0.001002

		playerShort	player	club	leagueCountry	birthday	height	weight	position	games	victories	ties	defeats	goals	yellowCards	yellowReds	redCards	refCount	refCountryCount	meanIAT	seIAT	meanExp	seExp	color_rating	meanIAT_nIAT	meanExp_nExp	meanIAT_GameNbr	meanExp_GameNbr	meanIAT_cards	meanExp_cards
playerShort
aaron-hughes	5	aaron-hughes	Aaron Hughes	Fulham FC	England	08.11.1979	182.0	71.0	Center Back	654	247	179	228	9	19	0	0	166	37	0.346459	0.001505	0.494575	0.009691	1	0.328409	0.367721	0.333195	0.400637	0.000000	0.000000
aaron-hunt	745	aaron-hunt	Aaron Hunt	Werder Bremen	Germany	04.09.1986	183.0	73.0	Attacking Midfielder	336	141	73	122	62	42	0	1	99	25	0.348818	0.000834	0.449220	0.003823	2	0.329945	0.441615	0.341438	0.380811	0.000000	0.000000
aaron-lennon	746	aaron-lennon	Aaron Lennon	Tottenham Hotspur	England	16.04.1987	165.0	63.0	Right Midfielder	412	200	97	115	31	11	0	0	101	28	0.345893	0.001113	0.491482	0.006350	2	0.328230	0.365628	0.332389	0.399459	0.000000	0.000000
aaron-ramsey	747	aaron-ramsey	Aaron Ramsey	Arsenal FC	England	26.12.1990	178.0	76.0	Center Midfielder	260	150	42	68	39	31	0	1	104	37	0.346821	0.003786	0.514693	0.015240	1	0.327775	0.412859	0.336638	0.433294	0.000000	0.000000
abdelhamid-el-kaoutari	4418	abdelhamid-el-kaoutari	Abdelhamid El-Kaoutari	Montpellier HSC	France	17.03.1990	180.0	73.0	Center Back	124	41	40	43	1	8	4	2	37	11	0.331600	0.000474	0.335587	0.001745	2	0.338847	0.379497	0.331882	0.328895	0.000000	0.000000
abdou-traore_2	2305	abdou-traore_2	Abdou Traoré	Girondins Bordeaux	France	17.01.1988	180.0	74.0	Right Midfielder	97	41	23	33	3	11	1	0	42	13	0.320079	0.000889	0.296562	0.002659	4	0.332117	0.366031	0.327985	0.317247	0.000000	0.000000
abdoulaye-diallo_2	47	abdoulaye-diallo_2	Abdoulaye Diallo	Stade Rennes	France	30.03.1992	189.0	80.0	Goalkeeper	24	8	8	8	0	0	0	0	20	5	0.341625	0.000186	0.400818	0.000774	4	0.337572	0.361394	0.343556	0.428271	0.000000	0.000000
abdoulaye-keita_2	4423	abdoulaye-keita_2	Abdoulaye Keita	Girondins Bordeaux	France	19.08.1990	188.0	83.0	Goalkeeper	3	0	1	2	0	0	0	0	2	2	0.355406	0.000249	0.417225	0.001029	4	0.341567	0.363284	0.348498	0.390184	0.000000	0.000000
abdoulwhaid-sissoko	4424	abdoulwhaid-sissoko	Abdoulwhaid Sissoko	Stade Brest	France	20.03.1990	180.0	68.0	Defensive Midfielder	121	34	25	62	3	21	0	2	56	3	0.348178	0.000163	0.429630	0.000674	5	0.344426	0.404517	0.344130	0.402314	0.000000	0.000000
abdul-rahman-baba	16617	abdul-rahman-baba	Abdul Rahman Baba	SpVgg Greuther Fürth	Germany	02.07.1994	179.0	70.0	Left Fullback	50	17	8	25	0	3	0	1	27	7	0.342072	0.000103	0.361068	0.000553	4	0.337478	0.339232	0.339733	0.341395	0.000000	0.000000
abdul-razak	3802	abdul-razak	Abdul Razak	Manchester City	England	11.11.1992	180.0	78.0	Center Midfielder	36	16	8	12	0	2	0	0	33	10	0.354566	0.000989	0.536408	0.006177	5	0.329069	0.369694	0.352243	0.521411	0.000000	0.000000
abel-aguilar	2306	abel-aguilar	Abel Aguilar	Deportivo La Coruña	Spain	06.01.1985	185.0	78.0	Defensive Midfielder	246	93	61	92	25	70	3	2	110	22	0.359183	0.000400	0.549105	0.002156	3	0.330335	0.459423	0.362848	0.557574	0.369894	0.588297
abelaziz-barrada	343	abelaziz-barrada	Abelaziz Barrada	Getafe CF	Spain	19.06.1989	185.0	78.0	Joker	85	28	25	32	14	11	1	1	39	18	0.341541	0.004548	0.399237	0.023271	1	0.351721	0.456120	0.356885	0.501552	0.000000	0.000000
abou-diaby	458	abou-diaby	Abou Diaby	Arsenal FC	England	11.05.1986	188.0	75.0	Center Midfielder	208	109	51	48	21	24	0	2	65	21	0.336906	0.003345	0.422867	0.016387	4	0.328151	0.363045	0.331484	0.390292	0.000000	0.000000
adam-bodzek	14239	adam-bodzek	Adam Bodzek	Fortuna Düsseldorf	Germany	07.09.1985	184.0	79.0	Defensive Midfielder	211	88	63	60	9	66	1	0	56	1	0.336628	0.000055	0.335967	0.000225	2	0.336628	0.335967	0.336628	0.335967	0.000000	0.000000
adam-campbell	1884	adam-campbell	Adam Campbell	Newcastle United	England	01.01.1995	168.0	76.0	Joker	20	6	5	9	2	1	0	0	17	7	0.333246	0.000037	0.389978	0.000207	1	0.327112	0.357443	0.332263	0.384948	0.000000	0.000000
adam-federici	1922	adam-federici	Adam Federici	Reading FC	England	31.01.1985	188.0	90.0	Goalkeeper	206	83	54	69	1	4	0	0	81	10	0.330250	0.000042	0.380157	0.000284	1	0.327063	0.413317	0.328090	0.365769	0.000000	0.000000
adam-hlousek	1589	adam-hlousek	Adam Hloušek	1. FC Nürnberg	Germany	20.12.1988	188.0	76.0	Left Midfielder	131	46	34	51	13	21	0	0	60	17	0.364772	0.000204	0.498535	0.000977	1	0.338019	0.371041	0.369815	0.507140	0.000000	0.000000
adam-johnson	749	adam-johnson	Adam Johnson	Sunderland AFC	England	14.07.1987	175.0	62.0	Left Midfielder	339	145	68	126	55	21	0	0	92	22	0.338806	0.000718	0.418797	0.003206	1	0.327601	0.360663	0.330414	0.374101	0.000000	0.000000
adam-morgan	10385	adam-morgan	Adam Morgan	Liverpool FC	England	21.04.1994	179.0	76.0	Joker	23	8	7	8	1	0	0	0	23	11	0.346997	0.000130	0.448102	0.000357	2	0.327470	0.359985	0.346997	0.448102	0.000000	0.000000
adam-pinter	1590	adam-pinter	Ádám Pintér	Real Zaragoza	Spain	12.06.1988	190.0	76.0	Center Back	82	28	18	36	1	14	1	1	56	23	0.365417	0.000373	0.630062	0.002062	1	0.340622	0.440773	0.366836	0.616819	0.000000	0.000000
adam-smith_3	532	adam-smith_3	Adam Smith	Millwall FC	England	29.04.1991	173.0	73.0	Right Fullback	131	55	31	45	4	29	2	1	70	11	0.331529	0.000411	0.388481	0.002301	1	0.326793	0.356671	0.329276	0.373564	0.000000	0.000000
adam-szalai	1591	adam-szalai	Ádám Szalai	1. FSV Mainz 05	Germany	09.12.1987	193.0	87.0	Center Forward	182	83	40	59	51	18	1	0	70	25	0.347791	0.000223	0.453066	0.001301	2	0.336896	0.365094	0.341194	0.382994	0.000000	0.000000
adan	1592	adan	Adán	Real Madrid	Spain	13.05.1987	190.0	88.0	Goalkeeper	44	22	6	16	0	1	0	1	31	13	0.369184	0.000253	0.583849	0.001631	1	0.348044	0.471570	0.369231	0.584720	0.000000	0.000000
adel-taarabt	1924	adel-taarabt	Adel Taarabt	Queens Park Rangers	England	24.05.1989	180.0	78.0	Left Midfielder	209	67	56	86	41	23	1	0	84	11	0.326478	0.000105	0.335564	0.000441	2	0.327227	0.358152	0.327106	0.350180	0.000000	0.000000
adil-rami	750	adil-rami	Adil Rami	Valencia CF	Spain	27.12.1985	190.0	88.0	Center Back	286	144	71	71	20	59	2	3	101	25	0.354539	0.001044	0.492047	0.004972	2	0.337809	0.401877	0.351279	0.460517	0.325185	0.538462
adrian-colunga	1594	adrian-colunga	Adrián Colunga	Getafe CF	Spain	17.11.1984	171.0	69.0	Center Forward	178	53	43	82	39	22	1	0	47	7	0.365679	0.000167	0.576993	0.000738	2	0.366673	0.572510	0.368781	0.585312	0.000000	0.000000
adrian-mutu	494	adrian-mutu	Adrian Mutu	AC Ajaccio	France	08.01.1979	180.0	74.0	Left Winger	453	199	120	134	178	90	2	3	160	32	0.364151	0.000535	0.533691	0.002496	2	0.334899	0.390277	0.372146	0.519101	0.000000	0.000000
adrian_2	649	adrian_2	Adrián	Atlético Madrid	Spain	08.01.1988	183.0	73.0	Center Forward	294	145	62	87	60	16	0	0	86	31	0.357048	0.000304	0.517618	0.001735	1	0.329110	0.445014	0.365247	0.563727	0.000000	0.000000
adrian_7	611	adrian_7	Adrián	SD Compostela	Spain	21.05.1983	188.0	82.0	Goalkeeper	29	10	11	8	0	3	0	0	28	1	0.369894	0.000229	0.588297	0.001002	2	0.369894	0.588297	0.369894	0.588297	0.000000	0.000000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
xisco_2	2225	xisco_2	Xisco	Newcastle United	England	26.06.1986	188.0	85.0	Center Forward	166	68	28	70	46	26	0	0	72	11	0.361779	0.000048	0.545934	0.000217	2	0.336222	0.407244	0.365399	0.562427	0.000000	0.000000
yacine-brahimi	218	yacine-brahimi	Yacine Brahimi	Granada CF	Spain	08.02.1990	175.0	65.0	Attacking Midfielder	149	59	30	60	19	20	2	0	70	18	0.343521	0.000718	0.425677	0.002091	3	0.345511	0.420015	0.348997	0.450965	0.000000	0.000000
yann-mvila	1869	yann-mvila	Yann M'Vila	Stade Rennes	France	26.06.1990	169.0	56.0	Defensive Midfielder	203	91	52	60	4	29	1	1	86	24	0.358188	0.000193	0.570958	0.000962	3	0.333653	0.386629	0.349619	0.497316	0.000000	0.000000
yannick-djalo	1217	yannick-djalo	Yannick Djaló	Toulouse FC	France	05.05.1986	171.0	62.0	Center Forward	167	73	46	48	29	10	0	0	78	25	0.363715	0.001151	0.582103	0.005573	4	0.328302	0.458565	0.377326	0.667046	0.000000	0.000000
yannik-schulze	8964	yannik-schulze	Yannik Schulze	Hannover 96	Germany	05.02.1995	186.0	77.0	Center Back	49	20	12	17	3	8	0	0	35	1	0.336628	0.000055	0.335967	0.000225	1	0.336628	0.335967	0.336628	0.335967	0.000000	0.000000
yassine-benzia	4004	yassine-benzia	Yassine Benzia	Olympique Lyon	France	08.09.1994	179.0	71.0	Joker	60	32	15	13	19	7	0	0	43	23	0.337672	0.002019	0.446000	0.010735	2	0.327654	0.454715	0.339181	0.439335	0.295086	0.488235
yassine-el-ghanassi	1218	yassine-el-ghanassi	Yassine El Ghanassi	West Bromwich Albion	England	12.07.1990	173.0	76.0	Left Winger	181	79	42	60	26	15	0	0	67	21	0.360055	0.000956	0.510997	0.004346	3	0.342939	0.428957	0.361042	0.540813	0.000000	0.000000
yassine-jebbour	4871	yassine-jebbour	Yassine Jebbour	Stade Rennes	France	24.08.1991	181.0	70.0	Left Fullback	54	16	15	23	0	2	1	2	30	8	0.343441	0.000184	0.420413	0.000791	4	0.337430	0.371346	0.339549	0.382941	0.000000	0.000000
yaya-toure	236	yaya-toure	Yaya Touré	Manchester City	England	13.05.1983	191.0	90.0	Defensive Midfielder	448	255	83	110	79	82	0	1	152	44	0.346369	0.002599	0.428465	0.014016	5	0.333713	0.393813	0.346490	0.439967	0.000000	0.000000
yoan-gouffran	1220	yoan-gouffran	Yoan Gouffran	Girondins Bordeaux	France	25.05.1986	175.0	70.0	Right Winger	269	121	67	81	59	18	0	1	75	20	0.338854	0.001154	0.415115	0.006546	3	0.329061	0.363175	0.335075	0.367163	0.000000	0.000000
yoann-gourcuff	476	yoann-gourcuff	Yoann Gourcuff	Olympique Lyon	France	11.07.1986	185.0	79.0	Attacking Midfielder	341	178	73	90	51	31	0	2	120	34	0.350000	0.003457	0.466304	0.017888	1	0.329477	0.444544	0.344501	0.410531	0.000000	0.000000
yohan-cabaye	1222	yohan-cabaye	Yohan Cabaye	Newcastle United	England	14.01.1986	175.0	69.0	Defensive Midfielder	372	178	106	88	56	86	1	1	119	30	0.345317	0.001287	0.443883	0.006975	1	0.329930	0.366518	0.337574	0.386732	0.325185	0.538462
yohandry-orozco	488	yohandry-orozco	Yohandry Orozco	VfL Wolfsburg	Germany	19.03.1991	164.0	55.0	Attacking Midfielder	67	25	22	20	14	4	0	0	46	20	0.375346	0.001731	0.656897	0.010125	3	0.327946	0.458521	0.379415	0.717210	0.000000	0.000000
yossi-benayoun	1872	yossi-benayoun	Yossi Benayoun	West Ham United	England	05.05.1980	178.0	70.0	Attacking Midfielder	456	201	105	150	88	31	1	0	147	34	0.353193	0.000281	0.522966	0.001805	2	0.328425	0.412539	0.344829	0.461143	0.000000	0.000000
younes-belhanda	4007	younes-belhanda	Younès Belhanda	Montpellier HSC	France	25.02.1990	174.0	75.0	Attacking Midfielder	173	77	42	54	37	32	2	3	64	24	0.348692	0.002790	0.541128	0.014250	2	0.338342	0.394386	0.342731	0.456223	0.295086	0.488235
younes-kaboul	1223	younes-kaboul	Younes Kaboul	Tottenham Hotspur	England	04.01.1986	190.0	87.0	Center Back	252	110	61	81	18	25	4	2	87	23	0.343357	0.001142	0.444511	0.006415	3	0.328878	0.365134	0.333986	0.391915	0.000000	0.000000
youssef-el-arabi	222	youssef-el-arabi	Youssef El-Arabi	Granada CF	Spain	03.02.1987	180.0	75.0	Center Forward	159	52	47	60	57	15	0	0	67	15	0.335832	0.000537	0.363995	0.002771	3	0.342493	0.388743	0.343162	0.412267	0.000000	0.000000
yunus-malli	9010	yunus-malli	Yunus Malli	1. FSV Mainz 05	Germany	24.02.1992	179.0	70.0	Attacking Midfielder	131	59	32	40	24	4	0	0	81	6	0.337992	0.000062	0.356349	0.000276	1	0.336189	0.340789	0.337471	0.348570	0.000000	0.000000
zdenk-pospch	1224	zdenk-pospch	Zdeněk Pospěch	1. FSV Mainz 05	Germany	14.12.1978	174.0	72.0	Right Fullback	297	144	67	86	19	26	1	0	104	34	0.353786	0.001586	0.521861	0.007173	1	0.334409	0.385939	0.350481	0.494984	0.000000	0.000000
zdravko-kuzmanovic	34	zdravko-kuzmanovic	Zdravko Kuzmanović	VfB Stuttgart	Germany	22.09.1987	186.0	71.0	Defensive Midfielder	339	156	77	106	38	42	1	1	136	36	0.358571	0.001509	0.500788	0.008648	1	0.336422	0.382447	0.358419	0.462541	0.000000	0.000000
ze-castro	1874	ze-castro	Zé Castro	Deportivo La Coruña	Spain	13.01.1983	183.0	76.0	Center Back	169	77	31	61	3	28	1	1	60	12	0.366725	0.000153	0.570099	0.000688	2	0.353190	0.494750	0.368825	0.581665	0.000000	0.000000
zhi-gin-lam	8716	zhi-gin-lam	Zhi Gin Lam	Hamburger SV	Germany	04.06.1991	175.0	67.0	Right Fullback	127	53	22	52	7	8	0	0	82	1	0.336628	0.000055	0.335967	0.000225	2	0.336628	0.335967	0.336628	0.335967	0.336628	0.335967
zlatan-alomerovic	9184	zlatan-alomerovic	Zlatan Alomerović	Borussia Dortmund	Germany	15.06.1991	187.0	88.0	Goalkeeper	111	50	27	34	0	10	0	1	68	1	0.336628	0.000055	0.335967	0.000225	1	0.336628	0.335967	0.336628	0.335967	0.000000	0.000000
zlatan-ibrahimovic	520	zlatan-ibrahimovic	Zlatan Ibrahimović	Paris Saint-Germain	France	03.10.1981	192.0	84.0	Center Forward	607	377	135	95	330	94	4	6	181	36	0.359884	0.000194	0.508760	0.000943	2	0.339429	0.409919	0.364542	0.497372	0.000000	0.000000
zlatko-junuzovic	2282	zlatko-junuzovic	Zlatko Junuzović	Werder Bremen	Germany	26.09.1987	172.0	69.0	Attacking Midfielder	361	131	90	140	48	53	1	0	113	37	0.351308	0.000570	0.483496	0.002939	1	0.336874	0.388562	0.341763	0.414282	0.000000	0.000000
zoltan-gera	1227	zoltan-gera	Zoltán Gera	West Bromwich Albion	England	22.04.1979	181.0	76.0	Left Winger	392	150	96	146	71	44	1	1	135	36	0.349091	0.001126	0.494216	0.005610	2	0.328085	0.364881	0.336001	0.417374	0.325185	0.538462
zoltan-stieber	3115	zoltan-stieber	Zoltán Stieber	SpVgg Greuther Fürth	Germany	16.10.1988	175.0	67.0	Left Midfielder	142	48	37	57	27	12	0	0	53	6	0.337051	0.000165	0.360397	0.000719	1	0.335765	0.340852	0.336786	0.345085	0.000000	0.000000
zoumana-camara	1228	zoumana-camara	Zoumana Camara	Paris Saint-Germain	France	03.04.1979	182.0	76.0	Center Back	395	148	117	130	7	46	2	6	90	21	0.348260	0.001014	0.426700	0.004507	4	0.331266	0.370416	0.338068	0.363993	0.325185	0.538462
zubikarai	3116	zubikarai	Zubikarai	Real Sociedad	Spain	26.02.1984	185.0	84.0	Goalkeeper	47	14	15	18	0	2	0	2	29	2	0.368884	0.000227	0.591902	0.000992	1	0.367255	0.597637	0.369270	0.590521	0.000000	0.000000
zurutuza	3117	zurutuza	Zurutuza	Real Sociedad	Spain	19.07.1986	186.0	78.0	Defensive Midfielder	160	68	39	53	12	22	0	0	40	6	0.365978	0.000289	0.590718	0.001307	1	0.368534	0.578865	0.368915	0.588902	0.369894	0.588297