In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import numpy as np
from numpy import random
database = 'database.sqlite'
conn = sqlite3.connect(database)

In [2]:
query = """select * from match where home_player_X1 is not null and 'home_player_Y1' is not null 
and  home_player_X2 is not null and 'home_player_Y2' is not null 
and  home_player_X3 is not null and home_player_Y3 is not null 
and  home_player_X4 is not null and home_player_Y4 is not null 
and  home_player_X5 is not null and home_player_Y5 is not null 
and  home_player_X6 is not null and home_player_Y6 is not null 
and  home_player_X7 is not null and home_player_Y7 is not null 
and  home_player_X8 is not null and home_player_Y8 is not null 
and  home_player_X9 is not null and home_player_Y9 is not null 
and  home_player_X10 is not null and home_player_Y10 is not null 
and  home_player_X11 is not null and home_player_Y11 is not null  
and  away_player_X1 is not null and away_player_Y1 is not null 
and  away_player_X2 is not null and away_player_Y2 is not null 
and  away_player_X3 is not null and away_player_Y3 is not null 
and  away_player_X4 is not null and away_player_Y4 is not null 
and  away_player_X5 is not null and away_player_Y5 is not null 
and  away_player_X6 is not null and away_player_Y6 is not null 
and  away_player_X7 is not null and away_player_Y7 is not null 
and  away_player_X8 is not null and away_player_Y8 is not null 
and  away_player_X9 is not null and away_player_Y9 is not null 
and  away_player_X10 is not null and away_player_Y10 is not null 
and  away_player_X11 is not null and away_player_Y11 is not null 
and home_team_goal is not null and away_team_goal is not null
and home_player_1 is not null 
and home_player_2 is not null  
and home_player_3 is not null  
and home_player_4 is not null  
and home_player_5 is not null  
and home_player_6 is not null  
and home_player_7 is not null  
and home_player_8 is not null 
and home_player_9 is not null  
and home_player_10 is not null  
and home_player_11 is not null  
and away_player_1 is not null  
and away_player_2 is not null  
and away_player_3 is not null  
and away_player_4 is not null  
and away_player_5 is not null  
and away_player_6 is not null  
and away_player_7 is not null  
and away_player_8 is not null  
and away_player_9 is not null  
and away_player_10 is not null  
and away_player_11 is not null  
and  B365H is not null and B365D is not null and B365A is not null;"""
matches = pd.read_sql(query, conn)
drop = matches.columns.values[-27:-1]
#Removing other betting houses odds
matches = matches.drop(drop,1)
matches= matches.drop('BSA',1)
#Raw features
matches= matches.drop(['goal', 'shoton', 'shotoff', 'foulcommit', 'card',
       'cross', 'corner', 'possession'],1)
matches.columns.values


Out[2]:
array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_X1',
       'home_player_X2', 'home_player_X3', 'home_player_X4',
       'home_player_X5', 'home_player_X6', 'home_player_X7',
       'home_player_X8', 'home_player_X9', 'home_player_X10',
       'home_player_X11', 'away_player_X1', 'away_player_X2',
       'away_player_X3', 'away_player_X4', 'away_player_X5',
       'away_player_X6', 'away_player_X7', 'away_player_X8',
       'away_player_X9', 'away_player_X10', 'away_player_X11',
       'home_player_Y1', 'home_player_Y2', 'home_player_Y3',
       'home_player_Y4', 'home_player_Y5', 'home_player_Y6',
       'home_player_Y7', 'home_player_Y8', 'home_player_Y9',
       'home_player_Y10', 'home_player_Y11', 'away_player_Y1',
       'away_player_Y2', 'away_player_Y3', 'away_player_Y4',
       'away_player_Y5', 'away_player_Y6', 'away_player_Y7',
       'away_player_Y8', 'away_player_Y9', 'away_player_Y10',
       'away_player_Y11', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10',
       'away_player_11', 'B365H', 'B365D', 'B365A'], dtype=object)

In [3]:
#Transforming date column into a date type
#matches['date']=pd.to_datetime(matches['date'], format='%Y-%m-%d %H:%M:%S.%f')
#
#matches['date']=matches['date']-matches['date'].unique()[0]
#matches['date']=matches['date'].astype('timedelta64[D]')

#from sklearn import preprocessing 
#le = preprocessing.LabelEncoder()
#matches['season'] = le.fit_transform(matches['season'].astype('str'))

In [4]:
import re
def def_formations(matches_positions):
    #pos=matches_positions.to_dataframe()
    form=[]
    for index,row in matches_positions.iterrows():
        b= row.values
        dfs = (b <= 3).sum()
        mid1 = ((b >= 4) & (b<=6)).sum()
        mid2 = ((b >= 7) & (b<=9)).sum()
        atk1 = ((b >= 10)).sum()
        formation="%d-%d-%d-%d"%(dfs,mid1,mid2,atk1)
        formation = re.sub('0-','',formation)
        form.append(formation)
    return form

In [5]:
positions_home= matches[['home_player_Y2',
                     'home_player_Y3',
                     'home_player_Y4',
                     'home_player_Y5',
                     'home_player_Y6',
                     'home_player_Y7',
                     'home_player_Y8',
                     'home_player_Y9',
                     'home_player_Y10',
                     'home_player_Y11']]
positions_away = matches[['away_player_Y2',
                     'away_player_Y3',
                     'away_player_Y4',
                     'away_player_Y5',
                     'away_player_Y6',
                     'away_player_Y7',
                     'away_player_Y8',
                     'away_player_Y9',
                     'away_player_Y10',
                     'away_player_Y11']]
formation_home=def_formations(positions_home)
formation_away=def_formations(positions_away)

matches['formation_h']=formation_home
matches['formation_a']=formation_away

matches['formation_a'].unique()


Out[5]:
array(['4-4-2', '4-5-1', '5-3-2', '4-3-3', '3-4-3', '3-5-2', '3-3-3-1',
       '4-2-1-3', '5-4-1', '4-2-3-1', '4-1-4-1', '4-3-2-1', '4-1-3-2',
       '4-2-2-2', '4-1-2-3', '4-3-1-2', '3-6-1', '3-1-4-2', '3-2-3-2'], dtype=object)

In [6]:
matches= matches.drop(['home_player_X1', 'home_player_X2', 'home_player_X3',
       'home_player_X4', 'home_player_X5', 'home_player_X6',
       'home_player_X7', 'home_player_X8', 'home_player_X9',
       'home_player_X10', 'home_player_X11', 'away_player_X1',
       'away_player_X2', 'away_player_X3', 'away_player_X4',
       'away_player_X5', 'away_player_X6', 'away_player_X7',
       'away_player_X8', 'away_player_X9', 'away_player_X10',
       'away_player_X11', 'home_player_Y1', 'home_player_Y2',
       'home_player_Y3', 'home_player_Y4', 'home_player_Y5',
       'home_player_Y6', 'home_player_Y7', 'home_player_Y8',
       'home_player_Y9', 'home_player_Y10', 'home_player_Y11',
       'away_player_Y1', 'away_player_Y2', 'away_player_Y3',
       'away_player_Y4', 'away_player_Y5', 'away_player_Y6',
       'away_player_Y7', 'away_player_Y8', 'away_player_Y9',
       'away_player_Y10', 'away_player_Y11'],1)

In [7]:
def determine_home_result(match):
    if match['home_team_goal'] > match['away_team_goal']:
        return 'H'
    elif match['home_team_goal'] < match['away_team_goal']:
        return 'A'
    else:
        return 'D'
    
    
matches['Output']=matches.apply(determine_home_result, axis=1)

In [8]:
matches.columns.values


Out[8]:
array(['id', 'country_id', 'league_id', 'season', 'stage', 'date',
       'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_1',
       'home_player_2', 'home_player_3', 'home_player_4', 'home_player_5',
       'home_player_6', 'home_player_7', 'home_player_8', 'home_player_9',
       'home_player_10', 'home_player_11', 'away_player_1',
       'away_player_2', 'away_player_3', 'away_player_4', 'away_player_5',
       'away_player_6', 'away_player_7', 'away_player_8', 'away_player_9',
       'away_player_10', 'away_player_11', 'B365H', 'B365D', 'B365A',
       'formation_h', 'formation_a', 'Output'], dtype=object)

In [9]:
data=matches
data = data[data['league_id']!= 0]
data = data[data['season']!= '2008/2009']
data[data['league_id'] == 1]['season'].unique()
data.sort_values(["country_id","league_id","season","stage"])


Out[9]:
id country_id league_id season stage date match_api_id home_team_api_id away_team_api_id home_team_goal ... away_player_8 away_player_9 away_player_10 away_player_11 B365H B365D B365A formation_h formation_a Output
18 309 1 1 2009/2010 1 2009-08-02 00:00:00 665321 9984 9991 1 ... 26916 37065 12574 46335 2.88 3.30 2.25 4-4-2 4-4-2 A
19 310 1 1 2009/2010 1 2009-08-01 00:00:00 665322 9994 10000 1 ... 37981 131531 75500 130027 2.30 3.30 2.88 4-4-2 4-4-2 D
20 311 1 1 2009/2010 1 2009-08-01 00:00:00 665323 8571 8635 0 ... 38393 38253 37069 46552 5.75 3.60 1.57 4-4-2 4-4-2 A
52 384 1 1 2009/2010 2 2009-08-09 00:00:00 665326 8342 9994 2 ... 95609 15662 30485 104404 1.50 3.60 7.00 4-4-2 4-4-2 H
53 385 1 1 2009/2010 2 2009-08-08 00:00:00 665327 8635 9984 3 ... 37909 104386 78462 89548 1.30 5.00 10.00 4-4-2 4-4-2 H
54 387 1 1 2009/2010 2 2009-08-08 00:00:00 665329 10000 9987 2 ... 43158 42153 169200 163674 2.20 3.30 3.25 4-4-2 4-4-2 D
55 388 1 1 2009/2010 2 2009-08-09 00:00:00 665330 9991 8203 2 ... 37112 178291 67952 17276 1.44 3.75 7.50 4-4-2 4-4-2 H
56 389 1 1 2009/2010 2 2009-08-08 00:00:00 665332 10001 8571 1 ... 38257 40521 39772 95615 1.75 3.50 4.75 4-4-2 4-4-2 D
57 390 1 1 2009/2010 2 2009-08-07 00:00:00 665333 9993 9985 1 ... 156551 25957 38369 178484 3.60 3.30 2.00 4-4-2 4-4-2 D
96 461 1 1 2009/2010 3 2009-08-15 00:00:00 665334 8635 10001 3 ... 38332 47410 25619 166584 1.29 5.50 10.00 4-4-2 4-4-2 H
97 462 1 1 2009/2010 3 2009-08-15 00:00:00 665335 9987 9991 1 ... 32760 39875 37065 12574 2.50 3.40 2.70 4-4-2 4-4-2 D
98 464 1 1 2009/2010 3 2009-08-15 00:00:00 665337 9984 10000 2 ... 131531 130027 104377 38231 2.40 3.40 2.80 4-4-2 4-4-2 D
99 466 1 1 2009/2010 3 2009-08-15 00:00:00 665340 8203 8342 2 ... 37979 38336 38366 38440 2.90 3.30 2.40 4-4-2 4-4-2 H
104 475 1 1 2009/2010 4 2009-08-23 00:00:00 665342 8342 8571 2 ... 40521 39772 78902 95615 1.33 4.50 8.50 4-4-2 4-4-2 D
105 478 1 1 2009/2010 4 2009-08-22 00:00:00 665345 10000 8635 0 ... 38383 38393 46552 12692 3.75 3.25 1.91 4-4-2 4-4-2 A
106 479 1 1 2009/2010 4 2009-08-22 00:00:00 665346 9991 9994 4 ... 95609 15662 30485 110140 1.50 3.80 6.50 4-4-2 4-4-2 H
107 481 1 1 2009/2010 4 2009-08-22 00:00:00 665349 9993 8203 1 ... 38969 178291 148286 17276 1.85 3.40 4.00 4-4-2 4-4-2 A
108 482 1 1 2009/2010 5 2009-08-30 00:00:00 665352 8635 9985 1 ... 25957 39631 39591 38369 2.10 3.25 3.30 4-4-2 4-4-2 D
109 483 1 1 2009/2010 5 2009-08-30 00:00:00 665353 9987 9993 1 ... 36868 38786 30910 33622 1.75 3.50 4.33 4-4-2 4-4-2 D
110 484 1 1 2009/2010 5 2009-08-30 00:00:00 665354 9984 8342 2 ... 38366 52280 27423 38440 3.00 3.25 2.25 4-4-2 4-4-2 A
111 485 1 1 2009/2010 5 2009-08-29 00:00:00 665357 8571 9991 1 ... 166618 39875 12574 46335 4.20 3.40 1.80 4-4-2 4-4-2 H
112 488 1 1 2009/2010 5 2009-08-28 00:00:00 665360 10001 10000 1 ... 75500 130027 104377 38231 2.50 3.25 2.80 4-4-2 4-4-2 A
113 489 1 1 2009/2010 6 2009-09-13 00:00:00 665361 8342 9987 1 ... 43158 32863 42153 38794 1.83 3.50 4.33 4-4-2 4-4-2 D
114 490 1 1 2009/2010 6 2009-09-12 00:00:00 665362 9985 8203 3 ... 38969 178291 148286 17276 1.36 4.75 8.50 4-4-2 4-4-2 H
115 492 1 1 2009/2010 6 2009-09-12 00:00:00 665364 10000 8571 0 ... 38257 40521 39772 78902 1.67 3.75 5.00 4-4-2 4-4-2 A
116 494 1 1 2009/2010 6 2009-09-12 00:00:00 665366 9991 9999 5 ... 33671 148336 24037 23997 1.29 5.25 11.00 4-4-2 4-4-2 H
117 495 1 1 2009/2010 6 2009-09-13 00:00:00 665368 9993 9994 2 ... 95609 15662 30485 69629 1.85 3.40 4.33 4-4-2 4-4-2 H
118 496 1 1 2009/2010 7 2009-09-20 00:00:00 665370 8635 9991 1 ... 166618 38233 12574 46335 1.57 3.60 5.50 4-4-2 4-4-2 D
119 498 1 1 2009/2010 7 2009-09-19 00:00:00 665374 9984 9993 1 ... 38784 38786 33622 178284 2.25 3.30 2.88 4-4-2 4-4-2 A
120 499 1 1 2009/2010 7 2009-09-19 00:00:00 665376 9994 9985 1 ... 148335 156551 178484 38369 5.00 3.40 1.67 4-4-2 4-4-2 A
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19599 24466 21518 21518 2015/2016 35 2016-04-24 00:00:00 2030506 8581 8315 2 ... 604105 33871 179772 102623 3.40 3.20 2.25 4-4-2 4-2-3-1 D
19600 24467 21518 21518 2015/2016 35 2016-04-23 00:00:00 2030507 9906 9864 1 ... 238841 432591 213653 75445 1.33 5.00 11.00 4-4-2 4-4-2 H
19601 24468 21518 21518 2015/2016 36 2016-04-30 00:00:00 2030508 8560 8633 0 ... 164684 635772 31921 359193 5.00 4.00 1.67 4-2-3-1 4-3-3 A
19602 24469 21518 21518 2015/2016 36 2016-05-01 00:00:00 2030509 10267 10205 0 ... 675088 562062 37506 45744 2.20 3.30 3.40 4-3-3 4-4-2 A
19603 24470 21518 21518 2015/2016 36 2016-05-01 00:00:00 2030510 9783 8305 0 ... 75192 240054 262592 213489 2.30 3.30 3.20 4-4-2 4-2-3-1 A
19604 24471 21518 21518 2015/2016 36 2016-04-29 00:00:00 2030511 9869 8372 2 ... 244563 40501 177475 193224 1.60 3.80 5.75 4-2-3-1 4-2-3-1 H
19605 24472 21518 21518 2015/2016 36 2016-04-30 00:00:00 2030512 8603 8634 0 ... 25773 30981 40636 19533 21.00 9.00 1.13 4-2-3-1 4-3-3 A
19606 24473 21518 21518 2015/2016 36 2016-05-01 00:00:00 2030513 8558 8302 1 ... 151063 111237 640725 33030 2.30 3.30 3.10 4-3-3 4-2-3-1 H
19607 24474 21518 21518 2015/2016 36 2016-04-30 00:00:00 2030514 7878 8306 3 ... 105925 213729 150872 190669 1.60 3.80 6.00 4-2-3-1 4-1-4-1 H
19608 24475 21518 21518 2015/2016 36 2016-05-01 00:00:00 2030515 8315 9910 2 ... 49836 246177 213486 174472 1.85 3.60 4.33 4-2-3-1 4-2-3-1 H
19609 24476 21518 21518 2015/2016 36 2016-05-02 00:00:00 2030516 9864 8581 3 ... 364244 477472 25462 38699 2.20 3.20 3.50 4-4-2 4-4-2 H
19610 24477 21518 21518 2015/2016 36 2016-04-30 00:00:00 2030517 9906 8370 1 ... 482331 150770 210065 46808 1.33 5.00 10.00 4-4-2 4-2-3-1 H
19611 24478 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030518 8560 8370 2 ... 482331 2802 210065 150770 3.50 3.60 2.05 4-2-3-1 4-2-3-1 H
19612 24479 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030519 8633 10267 3 ... 75307 40148 193869 241825 1.20 7.50 11.00 4-3-3 4-1-4-1 H
19613 24480 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030520 10205 9783 0 ... 151079 279173 194660 198566 2.45 3.10 3.10 4-4-2 4-4-2 A
19614 24481 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030521 8305 9869 1 ... 17299 395154 541557 474448 2.10 3.60 3.40 4-2-3-1 4-2-3-1 D
19615 24482 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030522 8372 8603 1 ... 193226 37824 722766 96652 2.10 3.60 3.40 4-2-3-1 4-2-3-1 D
19616 24483 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030523 8634 8558 5 ... 107930 88986 498033 41622 1.04 17.00 41.00 4-3-3 4-1-4-1 H
19617 24484 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030524 8302 7878 1 ... 495841 213711 213702 161291 2.88 3.75 2.30 4-2-3-1 4-2-3-1 A
19618 24485 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030525 8306 8315 0 ... 604105 33871 179772 33028 4.00 3.60 1.91 4-1-4-1 4-2-3-1 D
19619 24486 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030526 9910 9864 1 ... 238841 56686 75445 213653 1.50 4.20 7.00 4-2-3-1 4-4-2 H
19620 24487 21518 21518 2015/2016 37 2016-05-08 00:00:00 2030527 8581 9906 2 ... 72623 309334 432950 30853 17.00 6.50 1.20 4-2-3-1 4-4-2 H
19621 24488 21518 21518 2015/2016 38 2016-05-13 00:00:00 2030528 10267 8560 0 ... 33973 678234 99047 636565 1.95 3.75 3.75 4-2-3-1 4-2-3-1 A
19622 24489 21518 21518 2015/2016 38 2016-05-14 00:00:00 2030529 9783 8633 0 ... 95078 31921 26166 30893 13.00 8.00 1.18 4-4-2 4-3-3 A
19623 24491 21518 21518 2015/2016 38 2016-05-15 00:00:00 2030531 8603 8305 2 ... 75192 240054 262592 183548 5.00 3.80 1.70 4-4-2 4-2-3-1 H
19624 24492 21518 21518 2015/2016 38 2016-05-15 00:00:00 2030532 8558 8372 4 ... 40501 202553 193224 200917 2.00 3.60 3.70 4-1-4-1 4-4-2 H
19625 24493 21518 21518 2015/2016 38 2016-05-14 00:00:00 2030533 7878 8634 0 ... 30955 30981 40636 19533 26.00 11.00 1.08 4-2-3-1 4-3-3 A
19626 24495 21518 21518 2015/2016 38 2016-05-15 00:00:00 2030535 9864 8306 4 ... 484906 213729 37510 179083 1.80 3.75 4.50 4-4-2 4-1-4-1 H
19627 24496 21518 21518 2015/2016 38 2016-05-14 00:00:00 2030536 9906 9910 2 ... 49836 246177 213486 130298 1.75 3.75 4.50 4-4-2 4-2-3-1 H
19628 24497 21518 21518 2015/2016 38 2016-05-15 00:00:00 2030537 8370 8581 3 ... 150632 384413 466672 25462 1.33 5.25 9.00 4-2-3-1 4-2-3-1 H

17962 rows × 39 columns


In [20]:
def tabela_mandante(stageI, stageF, league_id, season, team_id):
    sql = """select team, count(case when gf > gs then 1 end) home_V,
    count(case when gs > gf then 1 end) home_D,
    count(case when gf = gs then 1 end) home_E,
    sum(gf) home_GF, avg(gf) as home_AVG_GF ,  sum(gs) as home_GS  , avg(gs) as home_AVG_GS from 
    (select home_team_api_id team, home_team_goal gf, away_team_goal gs from Match 
    where (stage between """+str(stageI)+""" and """+str(stageF)+""" ) and (season='"""+str(season)+"""') and league_id ="""+str(league_id)+"""  and home_team_api_id="""+str(team_id)+""") a 
    group by team order by home_V desc , home_GF-home_GS desc, home_GF desc;"""

    return pd.read_sql(sql, conn)

def tabela_visitante(stageI, stageF, league_id, season, team_id):
    sql = """select team as team, count(case when gf > gs then 1 end) away_V,
    count(case when gs > gf then 1 end) away_D,
    count(case when gf = gs then 1 end) away_E,
    sum(gf) away_GF, avg(gf) as away_AVG_GF ,  sum(gs) as away_GS  , avg(gs) as away_AVG_GS from 
    (select away_team_api_id team, away_team_goal gf, home_team_goal gs from Match 
    where (stage between """+str(stageI)+""" and """+str(stageF)+""" ) and (season='"""+str(season)+"""') and league_id ="""+str(league_id)+"""  and away_team_api_id="""+str(team_id)+""") a 
    group by team order by away_V desc , away_GF-away_GS desc, away_GF desc;"""
    return pd.read_sql(sql, conn)


def tabela(stageI, stageF, league_id, season, team_id,mando):
    sql= """select team , count(case when gf > gs then 1 end) """+mando+"""_VG,
    count(case when gs > gf then 1 end) """+mando+"""_DG,
    count(case when gf = gs then 1 end) """+mando+"""_EG,
    sum(gf) """+mando+"""_GFG, avg(gf) as """+mando+"""_AVG_GFG ,  sum(gs) as """+mando+"""_GSG  , avg(gs) as """+mando+"""_AVG_GSG  from 
    (select away_team_api_id as team, home_team_goal as  gs , away_team_goal as gf  from Match 
    where (stage between """+str(stageI)+""" and """+str(stageF)+""" ) and (season='"""+str(season)+"""') and league_id ="""+str(league_id)+""" and away_team_api_id="""+str(team_id)+"""
    union all select home_team_api_id as team,  away_team_goal as gs, home_team_goal as gf from Match 
    where (stage between """+str(stageI)+""" and """+str(stageF)+""" ) and (season='"""+str(season)+"""') and league_id ="""+str(league_id)+""" and home_team_api_id="""+str(team_id)+""") a group by team order by """+mando+"""_VG desc , """+mando+"""_GFG-"""+mando+"""_GSG desc, """+mando+"""_GFG desc;"""
    return pd.read_sql(sql, conn)

def team_attribute(team_id,seasonI,seasonF):
    sql=""" select buildUpPlaySpeed,buildUpPlaySpeedClass,buildUpPlayDribblingClass,buildUpPlayPassing,
        buildUpPlayPassingClass,buildUpPlayPositioningClass,chanceCreationPassing,chanceCreationPassingClass,
        chanceCreationCrossing,chanceCreationCrossingClass,chanceCreationShooting,chanceCreationShootingClass,
        chanceCreationPositioningClass,defencePressure,defencePressureClass,defenceAggression,
        defenceAggressionClass,defenceTeamWidth,defenceTeamWidthClass,defenceDefenderLineClass
        from Team_Attributes where team_api_id ="""+str(team_id)+""" and date between '"""+str(seasonI)+"""' and '"""+str(seasonF)+"""' ;"""
    return pd.read_sql(sql, conn)

def players_hw(player1,player2,player3,player4,player5,player6,player7,player8,player9,player10,player11):
    sql="""select * from Player where player_api_id = """+str(player1)+""" or player_api_id="""+str(player2)+""" 
    or player_api_id="""+str(player3)+""" or player_api_id="""+str(player4)+""" or player_api_id="""+str(player5)+"""
    or player_api_id="""+str(player6)+""" or player_api_id="""+str(player7)+""" or player_api_id="""+str(player8)+"""
    or player_api_id="""+str(player9)+""" or player_api_id="""+str(player10)+""" or player_api_id="""+str(player11)+""";"""
    return pd.read_sql(sql, conn)

In [19]:
data_final = pd.DataFrame()
delta_stage=5

datar = data[data['league_id']==1729]
datar = datar[datar['season']=='2012/2013']
datar = datar[datar['stage']>5]
datar = datar[datar['stage']<7]
#datar.apply(lambda row:tabela_mandante(row['stage'],row['stage']+delta_stage,row['league_id'],row['season'],row['home_team_api_id']))
for index, row in datar.iterrows():
    
    if row['stage']>5:
        #Pega os dados temporal e local do mandante    
        local_mandante = tabela_mandante(row['stage'],row['stage']+delta_stage,row['league_id'],row['season'],row['home_team_api_id'])
        global_mandante = tabela(row['stage'],row['stage']+delta_stage,row['league_id'],row['season'],row['home_team_api_id'],'home')
        mandante = pd.concat([local_mandante,global_mandante],axis=1)
        mandante = mandante.drop('team',axis=1)
        local_visitante = tabela_visitante(row['stage'],row['stage']+delta_stage,row['league_id'],row['season'],row['home_team_api_id'])
        global_visitante = tabela(row['stage'],row['stage']+delta_stage,row['league_id'],row['season'],row['home_team_api_id'],'away')
        visitante = pd.concat([local_visitante,global_visitante],axis=1)
        visitante = visitante.drop('team',axis=1)
        res = pd.concat([mandante,visitante],axis=1)
        #Pegando os dados sobre estilo de cada equipe que o FIFA classifica
        seasonI,seasonF=str(row['season']).split('/')
        home_team_attributes = team_attribute(row['home_team_api_id'],seasonI,seasonF)
        home_team_attributes.columns = ["h_"+ str(col) for col in home_team_attributes.columns]
        away_team_attributes = team_attribute(row['away_team_api_id'],seasonI,seasonF)
        away_team_attributes.columns = ["a_"+ str(col) for col in away_team_attributes.columns]
        team_attributes = pd.concat([home_team_attributes,away_team_attributes],axis=1)
        res = pd.concat([res,team_attributes],axis=1)
        #pegando os dados altura e peso
        home_hw = players_hw(row['home_player_1'],row['home_player_2'],row['home_player_3'],row['home_player_4'],row['home_player_5'],row['home_player_6'],row['home_player_7'],row['home_player_8'],row['home_player_9'],row['home_player_10'],row['home_player_11'])
        away_hw = players_hw(row['away_player_1'],row['away_player_2'],row['away_player_3'],row['away_player_4'],row['away_player_5'],row['away_player_6'],row['away_player_7'],row['away_player_8'],row['away_player_9'],row['away_player_10'],row['away_player_11'])
        hw = pd.concat([home_hw,away_hw],axis=1)
        res = pd.concat([res,hw],axis=1)
        
        #Junta os dados do jogo atual com os dados gerados pelas funções anteriores.
        for field in row.index:
            res[field] = row[field]
        data_final = data_final.append(res)

print data_final.columns


Index([u'home_V', u'home_D', u'home_E', u'home_GF', u'home_AVG_GF', u'home_GS',
       u'home_AVG_GS', u'home_VG', u'home_DG', u'home_EG',
       ...
       u'away_player_8', u'away_player_9', u'away_player_10',
       u'away_player_11', u'B365H', u'B365D', u'B365A', u'formation_h',
       u'formation_a', u'Output'],
      dtype='object', length=107)

In [ ]: