notebook.community

Edit and run



In [1]:

    
import os
import numpy as np
import pandas as pd
data_filename = os.getcwd()+"/data/NBA_2014_games-february.csv"



In [15]:

    
results = pd.read_csv(data_filename)
results.ix[:5]









    Out[15]:






  
    
      
      Date
      Start (ET)
      Visitor/Neutral
      PTS
      Home/Neutral
      PTS.1
      Unnamed: 6
      Notes
    
  
  
    
      0
      Sat Feb 1 2014
      7:30 PM
      Minnesota Timberwolves
      113
      Atlanta Hawks
      120
      Box Score
      NaN
    
    
      1
      Sat Feb 1 2014
      7:30 PM
      Philadelphia 76ers
      96
      Detroit Pistons
      113
      Box Score
      NaN
    
    
      2
      Sat Feb 1 2014
      8:00 PM
      Cleveland Cavaliers
      92
      Houston Rockets
      106
      Box Score
      NaN
    
    
      3
      Sat Feb 1 2014
      7:00 PM
      Brooklyn Nets
      96
      Indiana Pacers
      97
      Box Score
      NaN
    
    
      4
      Sat Feb 1 2014
      10:30 PM
      Utah Jazz
      87
      Los Angeles Clippers
      102
      Box Score
      NaN
    
    
      5
      Sat Feb 1 2014
      8:00 PM
      Milwaukee Bucks
      90
      Memphis Grizzlies
      99
      Box Score
      NaN



In [3]:

    
# Don't read the first row, as it is blank, and parse the date column as a date
results = pd.read_csv(data_filename, parse_dates=["Date"], skiprows=[0,])
# Fix the name of the columns
results.columns = ["Date", "Score Type", "Visitor Team", "VisitorPts", "Home Team", "HomePts", "OT", "Notes"]

results.ix[:5]









    




ValueErrorTraceback (most recent call last)
<ipython-input-3-4e6af4abb062> in <module>()
      2 results = pd.read_csv(data_filename, parse_dates=["Date"], skiprows=[0,])
      3 # Fix the name of the columns
----> 4 results.columns = ["Date", "Score Type", "Visitor Team", "VisitorPts", "Home Team", "HomePts", "OT", "Notes"]
      5 
      6 results.ix[:5]

C:\Anaconda3\lib\site-packages\pandas\core\generic.py in __setattr__(self, name, value)
   2683         try:
   2684             object.__getattribute__(self, name)
-> 2685             return object.__setattr__(self, name, value)
   2686         except AttributeError:
   2687             pass

pandas\src\properties.pyx in pandas.lib.AxisProperty.__set__ (pandas\lib.c:44748)()

C:\Anaconda3\lib\site-packages\pandas\core\generic.py in _set_axis(self, axis, labels)
    426 
    427     def _set_axis(self, axis, labels):
--> 428         self._data.set_axis(axis, labels)
    429         self._clear_item_cache()
    430 

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in set_axis(self, axis, new_labels)
   2633             raise ValueError('Length mismatch: Expected axis has %d elements, '
   2634                              'new values have %d elements' %
-> 2635                              (old_len, new_len))
   2636 
   2637         self.axes[axis] = new_labels

ValueError: Length mismatch: Expected axis has 9 elements, new values have 8 elements



In [4]:

    
results["HomeWin"] = results["VisitorPts"] < results["HomePts"]
# Our "class values"
y_true = results["HomeWin"].values
results.ix[:5]









    Out[4]:






  
    
      
      Date
      Score Type
      Visitor Team
      VisitorPts
      Home Team
      HomePts
      OT?
      Notes
      HomeWin
    
  
  
    
      0
      2013-10-29
      Box Score
      Orlando Magic
      87
      Indiana Pacers
      97
      NaN
      NaN
      True
    
    
      1
      2013-10-29
      Box Score
      Los Angeles Clippers
      103
      Los Angeles Lakers
      116
      NaN
      NaN
      True
    
    
      2
      2013-10-29
      Box Score
      Chicago Bulls
      95
      Miami Heat
      107
      NaN
      NaN
      True
    
    
      3
      2013-10-30
      Box Score
      Brooklyn Nets
      94
      Cleveland Cavaliers
      98
      NaN
      NaN
      True
    
    
      4
      2013-10-30
      Box Score
      Atlanta Hawks
      109
      Dallas Mavericks
      118
      NaN
      NaN
      True
    
    
      5
      2013-10-30
      Box Score
      Washington Wizards
      102
      Detroit Pistons
      113
      NaN
      NaN
      True



In [5]:

    
print("Home Win percentage: {0:.1f}%".format(100 * results["HomeWin"].sum() / results["HomeWin"].count()))









    



Home Win percentage: 58.0%



In [6]:

    
results["HomeLastWin"] = False
results["VisitorLastWin"] = False
# This creates two new columns, all set to False
results.ix[:5]









    Out[6]:






  
    
      
      Date
      Score Type
      Visitor Team
      VisitorPts
      Home Team
      HomePts
      OT?
      Notes
      HomeWin
      HomeLastWin
      VisitorLastWin
    
  
  
    
      0
      2013-10-29
      Box Score
      Orlando Magic
      87
      Indiana Pacers
      97
      NaN
      NaN
      True
      False
      False
    
    
      1
      2013-10-29
      Box Score
      Los Angeles Clippers
      103
      Los Angeles Lakers
      116
      NaN
      NaN
      True
      False
      False
    
    
      2
      2013-10-29
      Box Score
      Chicago Bulls
      95
      Miami Heat
      107
      NaN
      NaN
      True
      False
      False
    
    
      3
      2013-10-30
      Box Score
      Brooklyn Nets
      94
      Cleveland Cavaliers
      98
      NaN
      NaN
      True
      False
      False
    
    
      4
      2013-10-30
      Box Score
      Atlanta Hawks
      109
      Dallas Mavericks
      118
      NaN
      NaN
      True
      False
      False
    
    
      5
      2013-10-30
      Box Score
      Washington Wizards
      102
      Detroit Pistons
      113
      NaN
      NaN
      True
      False
      False



In [7]:

    
# Now compute the actual values for these
# Did the home and visitor teams win their last game?
from collections import defaultdict
won_last = defaultdict(int)

for index, row in results.iterrows():  # Note that this is not efficient
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    row["HomeLastWin"] = won_last[home_team]
    row["VisitorLastWin"] = won_last[visitor_team]
    results.ix[index] = row    
    # Set current win
    won_last[home_team] = row["HomeWin"]
    won_last[visitor_team] = not row["HomeWin"]
results.ix[20:25]









    Out[7]:






  
    
      
      Date
      Score Type
      Visitor Team
      VisitorPts
      Home Team
      HomePts
      OT?
      Notes
      HomeWin
      HomeLastWin
      VisitorLastWin
    
  
  
    
      20
      2013-11-01
      Box Score
      Milwaukee Bucks
      105
      Boston Celtics
      98
      NaN
      NaN
      False
      False
      False
    
    
      21
      2013-11-01
      Box Score
      Miami Heat
      100
      Brooklyn Nets
      101
      NaN
      NaN
      True
      False
      False
    
    
      22
      2013-11-01
      Box Score
      Cleveland Cavaliers
      84
      Charlotte Bobcats
      90
      NaN
      NaN
      True
      False
      True
    
    
      23
      2013-11-01
      Box Score
      Portland Trail Blazers
      113
      Denver Nuggets
      98
      NaN
      NaN
      False
      False
      False
    
    
      24
      2013-11-01
      Box Score
      Dallas Mavericks
      105
      Houston Rockets
      113
      NaN
      NaN
      True
      True
      True
    
    
      25
      2013-11-01
      Box Score
      San Antonio Spurs
      91
      Los Angeles Lakers
      85
      NaN
      NaN
      False
      False
      True



In [8]:

    
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=14)



In [9]:

    
from sklearn.cross_validation import cross_val_score

# Create a dataset with just the neccessary information
X_previouswins = results[["HomeLastWin", "VisitorLastWin"]].values
clf = DecisionTreeClassifier(random_state=14)
scores = cross_val_score(clf, X_previouswins, y_true, scoring='accuracy')
print("Using just the last result from the home and visitor teams")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Using just the last result from the home and visitor teams
Accuracy: 57.8%



In [10]:

    
# What about win streaks?
results["HomeWinStreak"] = 0
results["VisitorWinStreak"] = 0
# Did the home and visitor teams win their last game?
from collections import defaultdict
win_streak = defaultdict(int)

for index, row in results.iterrows():  # Note that this is not efficient
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    row["HomeWinStreak"] = win_streak[home_team]
    row["VisitorWinStreak"] = win_streak[visitor_team]
    results.ix[index] = row    
    # Set current win
    if row["HomeWin"]:
        win_streak[home_team] += 1
        win_streak[visitor_team] = 0
    else:
        win_streak[home_team] = 0
        win_streak[visitor_team] += 1



In [11]:

    
clf = DecisionTreeClassifier(random_state=14)
X_winstreak =  results[["HomeLastWin", "VisitorLastWin", "HomeWinStreak", "VisitorWinStreak"]].values
scores = cross_val_score(clf, X_winstreak, y_true, scoring='accuracy')
print("Using whether the home team is ranked higher")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Using whether the home team is ranked higher
Accuracy: 56.1%



In [12]:

    
# Let's try see which team is better on the ladder. Using the previous year's ladder
ladder_filename = os.path.join(data_folder, "leagues_NBA_2013_standings_expanded-standings.csv")
ladder = pd.read_csv(ladder_filename, skiprows=[0,1])
ladder









    Out[12]:






  
    
      
      Rk
      Team
      Overall
      Home
      Road
      E
      W
      A
      C
      SE
      ...
      Post
      ≤3
      ≥10
      Oct
      Nov
      Dec
      Jan
      Feb
      Mar
      Apr
    
  
  
    
      0
      1
      Miami Heat
      66-16
      37-4
      29-12
      41-11
      25-5
      14-4
      12-6
      15-1
      ...
      30-2
      9-3
      39-8
      1-0
      10-3
      10-5
      8-5
      12-1
      17-1
      8-1
    
    
      1
      2
      Oklahoma City Thunder
      60-22
      34-7
      26-15
      21-9
      39-13
      7-3
      8-2
      6-4
      ...
      21-8
      3-6
      44-6
      NaN
      13-4
      11-2
      11-5
      7-4
      12-5
      6-2
    
    
      2
      3
      San Antonio Spurs
      58-24
      35-6
      23-18
      25-5
      33-19
      8-2
      9-1
      8-2
      ...
      16-12
      9-5
      31-10
      1-0
      12-4
      12-4
      12-3
      8-3
      10-4
      3-6
    
    
      3
      4
      Denver Nuggets
      57-25
      38-3
      19-22
      19-11
      38-14
      5-5
      10-0
      4-6
      ...
      24-4
      11-7
      28-8
      0-1
      8-8
      9-6
      12-3
      8-4
      13-2
      7-1
    
    
      4
      5
      Los Angeles Clippers
      56-26
      32-9
      24-17
      21-9
      35-17
      7-3
      8-2
      6-4
      ...
      17-9
      3-5
      38-12
      1-0
      8-6
      16-0
      9-7
      8-5
      7-7
      7-1
    
    
      5
      6
      Memphis Grizzlies
      56-26
      32-9
      24-17
      22-8
      34-18
      8-2
      8-2
      6-4
      ...
      23-8
      6-4
      28-9
      0-1
      12-1
      7-7
      10-7
      9-2
      11-6
      7-2
    
    
      6
      7
      New York Knicks
      54-28
      31-10
      23-18
      37-15
      17-13
      10-6
      12-6
      15-3
      ...
      22-10
      7-5
      31-12
      NaN
      11-4
      10-5
      7-6
      6-5
      12-6
      8-2
    
    
      7
      8
      Brooklyn Nets
      49-33
      26-15
      23-18
      36-16
      13-17
      11-5
      13-5
      12-6
      ...
      18-11
      9-4
      23-17
      NaN
      11-4
      5-11
      11-4
      7-5
      8-7
      7-2
    
    
      8
      9
      Indiana Pacers
      49-32
      30-11
      19-21
      31-20
      18-12
      6-11
      13-3
      12-6
      ...
      17-11
      4-9
      27-14
      1-0
      7-8
      10-5
      9-6
      9-3
      11-5
      2-5
    
    
      9
      10
      Golden State Warriors
      47-35
      28-13
      19-22
      19-11
      28-24
      7-3
      5-5
      7-3
      ...
      17-13
      5-3
      20-18
      1-0
      8-6
      12-4
      8-7
      4-8
      9-7
      5-3
    
    
      10
      11
      Chicago Bulls
      45-37
      24-17
      21-20
      34-18
      11-19
      13-5
      9-7
      12-6
      ...
      15-15
      11-7
      16-16
      1-0
      6-7
      9-6
      12-4
      5-8
      7-7
      5-5
    
    
      11
      12
      Houston Rockets
      45-37
      29-12
      16-25
      21-9
      24-28
      7-3
      7-3
      7-3
      ...
      16-11
      5-5
      26-13
      1-0
      6-8
      10-6
      8-9
      6-5
      9-5
      5-4
    
    
      12
      13
      Los Angeles Lakers
      45-37
      29-12
      16-25
      17-13
      28-24
      6-4
      6-4
      5-5
      ...
      20-8
      8-5
      18-17
      0-2
      8-6
      7-7
      5-11
      9-4
      9-6
      7-1
    
    
      13
      14
      Atlanta Hawks
      44-38
      25-16
      19-22
      29-23
      15-15
      7-11
      11-7
      11-5
      ...
      15-16
      5-5
      19-20
      NaN
      9-5
      10-5
      7-9
      7-4
      8-10
      3-5
    
    
      14
      15
      Utah Jazz
      43-39
      30-11
      13-28
      17-13
      26-26
      5-5
      5-5
      7-3
      ...
      13-15
      5-7
      19-21
      1-0
      8-8
      6-9
      10-4
      6-6
      7-9
      5-3
    
    
      15
      16
      Boston Celtics
      41-40
      27-13
      14-27
      27-24
      14-16
      7-9
      8-9
      12-6
      ...
      13-16
      8-7
      18-23
      0-1
      9-6
      5-9
      8-7
      8-4
      8-8
      3-5
    
    
      16
      17
      Dallas Mavericks
      41-41
      24-17
      17-24
      17-13
      24-28
      5-5
      6-4
      6-4
      ...
      18-12
      5-8
      17-19
      1-1
      6-8
      5-10
      7-8
      6-5
      11-5
      5-4
    
    
      17
      18
      Milwaukee Bucks
      38-44
      21-20
      17-24
      24-28
      14-16
      11-7
      7-9
      6-12
      ...
      12-19
      7-5
      13-25
      NaN
      7-7
      9-6
      8-7
      4-8
      7-9
      3-7
    
    
      18
      19
      Philadelphia 76ers
      34-48
      23-18
      11-30
      22-30
      12-18
      7-9
      7-11
      8-10
      ...
      12-19
      4-5
      13-24
      1-0
      9-6
      4-11
      5-9
      3-8
      8-9
      4-5
    
    
      19
      20
      Toronto Raptors
      34-48
      21-20
      13-28
      22-30
      12-18
      5-11
      8-10
      9-9
      ...
      13-16
      8-8
      16-22
      0-1
      4-12
      7-7
      5-10
      7-5
      4-11
      7-2
    
    
      20
      21
      Portland Trail Blazers
      33-49
      22-19
      11-30
      15-15
      18-34
      5-5
      5-5
      5-5
      ...
      8-21
      9-6
      13-24
      1-0
      5-10
      9-4
      8-8
      3-9
      7-9
      0-9
    
    
      21
      22
      Minnesota Timberwolves
      31-51
      20-21
      11-30
      14-16
      17-35
      4-6
      7-3
      3-7
      ...
      12-20
      3-10
      15-26
      NaN
      7-8
      7-5
      3-12
      3-10
      6-11
      5-5
    
    
      22
      23
      Detroit Pistons
      29-53
      18-23
      11-30
      25-27
      4-26
      6-12
      8-8
      11-7
      ...
      8-20
      6-10
      15-29
      0-1
      5-11
      6-10
      6-7
      6-8
      1-13
      5-3
    
    
      23
      24
      Washington Wizards
      29-53
      22-19
      7-34
      15-37
      14-16
      5-13
      5-13
      5-11
      ...
      14-17
      6-9
      13-17
      0-1
      1-12
      3-11
      7-9
      7-5
      9-8
      2-7
    
    
      24
      25
      Sacramento Kings
      28-54
      20-21
      8-33
      14-16
      14-38
      4-6
      4-6
      6-4
      ...
      9-19
      7-3
      12-31
      0-1
      4-10
      7-8
      6-11
      3-9
      7-8
      1-7
    
    
      25
      26
      New Orleans Hornets
      27-55
      16-25
      11-30
      12-18
      15-37
      3-7
      5-5
      4-6
      ...
      8-21
      7-5
      10-27
      0-1
      4-9
      3-13
      8-8
      5-8
      6-9
      1-7
    
    
      26
      27
      Phoenix Suns
      25-57
      17-24
      8-33
      8-22
      17-35
      1-9
      4-6
      3-7
      ...
      8-21
      6-8
      10-31
      0-1
      7-9
      4-11
      5-9
      4-9
      3-12
      2-6
    
    
      27
      28
      Cleveland Cavaliers
      24-58
      14-27
      10-31
      18-34
      6-24
      5-13
      3-13
      10-8
      ...
      8-21
      6-11
      7-27
      1-0
      3-12
      3-13
      6-8
      7-5
      2-12
      2-8
    
    
      28
      29
      Charlotte Bobcats
      21-61
      15-26
      6-35
      18-34
      3-27
      6-12
      6-12
      6-10
      ...
      9-21
      6-6
      6-37
      NaN
      7-8
      1-15
      3-11
      2-10
      4-12
      4-5
    
    
      29
      30
      Orlando Magic
      20-62
      12-29
      8-33
      10-42
      10-20
      2-16
      5-13
      3-13
      ...
      5-25
      2-9
      8-30
      NaN
      5-10
      7-9
      2-12
      2-11
      3-13
      1-7
    
  

30 rows × 24 columns



In [13]:

    
# We can create a new feature -- HomeTeamRanksHigher\
results["HomeTeamRanksHigher"] = 0
for index, row in results.iterrows():
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    if home_team == "New Orleans Pelicans":
        home_team = "New Orleans Hornets"
    elif visitor_team == "New Orleans Pelicans":
        visitor_team = "New Orleans Hornets"
    home_rank = ladder[ladder["Team"] == home_team]["Rk"].values[0]
    visitor_rank = ladder[ladder["Team"] == visitor_team]["Rk"].values[0]
    row["HomeTeamRanksHigher"] = int(home_rank > visitor_rank)
    results.ix[index] = row
results[:5]









    Out[13]:






  
    
      
      Date
      Score Type
      Visitor Team
      VisitorPts
      Home Team
      HomePts
      OT?
      Notes
      HomeWin
      HomeLastWin
      VisitorLastWin
      HomeWinStreak
      VisitorWinStreak
      HomeTeamRanksHigher
    
  
  
    
      0
      2013-10-29
      Box Score
      Orlando Magic
      87
      Indiana Pacers
      97
      NaN
      NaN
      True
      False
      False
      0
      0
      0
    
    
      1
      2013-10-29
      Box Score
      Los Angeles Clippers
      103
      Los Angeles Lakers
      116
      NaN
      NaN
      True
      False
      False
      0
      0
      1
    
    
      2
      2013-10-29
      Box Score
      Chicago Bulls
      95
      Miami Heat
      107
      NaN
      NaN
      True
      False
      False
      0
      0
      0
    
    
      3
      2013-10-30
      Box Score
      Brooklyn Nets
      94
      Cleveland Cavaliers
      98
      NaN
      NaN
      True
      False
      False
      0
      0
      1
    
    
      4
      2013-10-30
      Box Score
      Atlanta Hawks
      109
      Dallas Mavericks
      118
      NaN
      NaN
      True
      False
      False
      0
      0
      1



In [14]:

    
X_homehigher =  results[["HomeLastWin", "VisitorLastWin", "HomeTeamRanksHigher"]].values
clf = DecisionTreeClassifier(random_state=14)
scores = cross_val_score(clf, X_homehigher, y_true, scoring='accuracy')
print("Using whether the home team is ranked higher")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Using whether the home team is ranked higher
Accuracy: 60.3%



In [15]:

    
from sklearn.grid_search import GridSearchCV

parameter_space = {
                   "max_depth": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
                   }
clf = DecisionTreeClassifier(random_state=14)
grid = GridSearchCV(clf, parameter_space)
grid.fit(X_homehigher, y_true)
print("Accuracy: {0:.1f}%".format(grid.best_score_ * 100))









    



Accuracy: 60.6%



In [16]:

    
# Who won the last match? We ignore home/visitor for this bit
last_match_winner = defaultdict(int)
results["HomeTeamWonLast"] = 0

for index, row in results.iterrows():
    home_team = row["Home Team"]
    visitor_team = row["Visitor Team"]
    teams = tuple(sorted([home_team, visitor_team]))  # Sort for a consistent ordering
    # Set in the row, who won the last encounter
    row["HomeTeamWonLast"] = 1 if last_match_winner[teams] == row["Home Team"] else 0
    results.ix[index] = row
    # Who won this one?
    winner = row["Home Team"] if row["HomeWin"] else row["Visitor Team"]
    last_match_winner[teams] = winner
results.ix[:5]









    Out[16]:






  
    
      
      Date
      Score Type
      Visitor Team
      VisitorPts
      Home Team
      HomePts
      OT?
      Notes
      HomeWin
      HomeLastWin
      VisitorLastWin
      HomeWinStreak
      VisitorWinStreak
      HomeTeamRanksHigher
      HomeTeamWonLast
    
  
  
    
      0
      2013-10-29
      Box Score
      Orlando Magic
      87
      Indiana Pacers
      97
      NaN
      NaN
      True
      False
      False
      0
      0
      0
      0
    
    
      1
      2013-10-29
      Box Score
      Los Angeles Clippers
      103
      Los Angeles Lakers
      116
      NaN
      NaN
      True
      False
      False
      0
      0
      1
      0
    
    
      2
      2013-10-29
      Box Score
      Chicago Bulls
      95
      Miami Heat
      107
      NaN
      NaN
      True
      False
      False
      0
      0
      0
      0
    
    
      3
      2013-10-30
      Box Score
      Brooklyn Nets
      94
      Cleveland Cavaliers
      98
      NaN
      NaN
      True
      False
      False
      0
      0
      1
      0
    
    
      4
      2013-10-30
      Box Score
      Atlanta Hawks
      109
      Dallas Mavericks
      118
      NaN
      NaN
      True
      False
      False
      0
      0
      1
      0
    
    
      5
      2013-10-30
      Box Score
      Washington Wizards
      102
      Detroit Pistons
      113
      NaN
      NaN
      True
      False
      False
      0
      0
      0
      0



In [17]:

    
X_home_higher =  results[["HomeTeamRanksHigher", "HomeTeamWonLast"]].values
clf = DecisionTreeClassifier(random_state=14)
scores = cross_val_score(clf, X_home_higher, y_true, scoring='accuracy')
print("Using whether the home team is ranked higher")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Using whether the home team is ranked higher
Accuracy: 60.6%



In [18]:

    
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
encoding = LabelEncoder()
encoding.fit(results["Home Team"].values)
home_teams = encoding.transform(results["Home Team"].values)
visitor_teams = encoding.transform(results["Visitor Team"].values)
X_teams = np.vstack([home_teams, visitor_teams]).T

onehot = OneHotEncoder()
X_teams = onehot.fit_transform(X_teams).todense()

clf = DecisionTreeClassifier(random_state=14)
scores = cross_val_score(clf, X_teams, y_true, scoring='accuracy')
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Accuracy: 60.0%



In [19]:

    
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=14)
scores = cross_val_score(clf, X_teams, y_true, scoring='accuracy')
print("Using full team labels is ranked higher")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Using full team labels is ranked higher
Accuracy: 60.6%



In [20]:

    
X_all = np.hstack([X_home_higher, X_teams])
print(X_all.shape)



In [21]:

    
clf = RandomForestClassifier(random_state=14)
scores = cross_val_score(clf, X_all, y_true, scoring='accuracy')
print("Using whether the home team is ranked higher")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))









    



Using whether the home team is ranked higher
Accuracy: 61.1%



In [22]:

    
#n_estimators=10, criterion='gini', max_depth=None, 
#min_samples_split=2, min_samples_leaf=1,
#max_features='auto',
#max_leaf_nodes=None, bootstrap=True,
#oob_score=False, n_jobs=1,
#random_state=None, verbose=0, min_density=None, compute_importances=None
parameter_space = {
                   "max_features": [2, 10, 'auto'],
                   "n_estimators": [100,],
                   "criterion": ["gini", "entropy"],
                   "min_samples_leaf": [2, 4, 6],
                   }
clf = RandomForestClassifier(random_state=14)
grid = GridSearchCV(clf, parameter_space)
grid.fit(X_all, y_true)
print("Accuracy: {0:.1f}%".format(grid.best_score_ * 100))
print(grid.best_estimator_)









    



Accuracy: 64.2%
RandomForestClassifier(bootstrap=True, compute_importances=None,
            criterion='entropy', max_depth=None, max_features=2,
            max_leaf_nodes=None, min_density=None, min_samples_leaf=6,
            min_samples_split=2, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=14, verbose=0)



In [ ]:

	Date	Start (ET)	Visitor/Neutral	PTS	Home/Neutral	PTS.1	Unnamed: 6	Notes
0	Sat Feb 1 2014	7:30 PM	Minnesota Timberwolves	113	Atlanta Hawks	120	Box Score	NaN
1	Sat Feb 1 2014	7:30 PM	Philadelphia 76ers	96	Detroit Pistons	113	Box Score	NaN
2	Sat Feb 1 2014	8:00 PM	Cleveland Cavaliers	92	Houston Rockets	106	Box Score	NaN
3	Sat Feb 1 2014	7:00 PM	Brooklyn Nets	96	Indiana Pacers	97	Box Score	NaN
4	Sat Feb 1 2014	10:30 PM	Utah Jazz	87	Los Angeles Clippers	102	Box Score	NaN
5	Sat Feb 1 2014	8:00 PM	Milwaukee Bucks	90	Memphis Grizzlies	99	Box Score	NaN

	Date	Score Type	Visitor Team	VisitorPts	Home Team	HomePts	OT?	Notes	HomeWin
0	2013-10-29	Box Score	Orlando Magic	87	Indiana Pacers	97	NaN	NaN	True
1	2013-10-29	Box Score	Los Angeles Clippers	103	Los Angeles Lakers	116	NaN	NaN	True
2	2013-10-29	Box Score	Chicago Bulls	95	Miami Heat	107	NaN	NaN	True
3	2013-10-30	Box Score	Brooklyn Nets	94	Cleveland Cavaliers	98	NaN	NaN	True
4	2013-10-30	Box Score	Atlanta Hawks	109	Dallas Mavericks	118	NaN	NaN	True
5	2013-10-30	Box Score	Washington Wizards	102	Detroit Pistons	113	NaN	NaN	True

	Date	Score Type	Visitor Team	VisitorPts	Home Team	HomePts	OT?	Notes	HomeWin	HomeLastWin	VisitorLastWin
20	2013-11-01	Box Score	Milwaukee Bucks	105	Boston Celtics	98	NaN	NaN	False	False	False
21	2013-11-01	Box Score	Miami Heat	100	Brooklyn Nets	101	NaN	NaN	True	False	False
22	2013-11-01	Box Score	Cleveland Cavaliers	84	Charlotte Bobcats	90	NaN	NaN	True	False	True
23	2013-11-01	Box Score	Portland Trail Blazers	113	Denver Nuggets	98	NaN	NaN	False	False	False
24	2013-11-01	Box Score	Dallas Mavericks	105	Houston Rockets	113	NaN	NaN	True	True	True
25	2013-11-01	Box Score	San Antonio Spurs	91	Los Angeles Lakers	85	NaN	NaN	False	False	True

	Rk	Team	Overall	Home	Road	E	W	A	C	SE	...	Post	≤3	≥10	Oct	Nov	Dec	Jan	Feb	Mar	Apr
0	1	Miami Heat	66-16	37-4	29-12	41-11	25-5	14-4	12-6	15-1	...	30-2	9-3	39-8	1-0	10-3	10-5	8-5	12-1	17-1	8-1
1	2	Oklahoma City Thunder	60-22	34-7	26-15	21-9	39-13	7-3	8-2	6-4	...	21-8	3-6	44-6	NaN	13-4	11-2	11-5	7-4	12-5	6-2
2	3	San Antonio Spurs	58-24	35-6	23-18	25-5	33-19	8-2	9-1	8-2	...	16-12	9-5	31-10	1-0	12-4	12-4	12-3	8-3	10-4	3-6
3	4	Denver Nuggets	57-25	38-3	19-22	19-11	38-14	5-5	10-0	4-6	...	24-4	11-7	28-8	0-1	8-8	9-6	12-3	8-4	13-2	7-1
4	5	Los Angeles Clippers	56-26	32-9	24-17	21-9	35-17	7-3	8-2	6-4	...	17-9	3-5	38-12	1-0	8-6	16-0	9-7	8-5	7-7	7-1
5	6	Memphis Grizzlies	56-26	32-9	24-17	22-8	34-18	8-2	8-2	6-4	...	23-8	6-4	28-9	0-1	12-1	7-7	10-7	9-2	11-6	7-2
6	7	New York Knicks	54-28	31-10	23-18	37-15	17-13	10-6	12-6	15-3	...	22-10	7-5	31-12	NaN	11-4	10-5	7-6	6-5	12-6	8-2
7	8	Brooklyn Nets	49-33	26-15	23-18	36-16	13-17	11-5	13-5	12-6	...	18-11	9-4	23-17	NaN	11-4	5-11	11-4	7-5	8-7	7-2
8	9	Indiana Pacers	49-32	30-11	19-21	31-20	18-12	6-11	13-3	12-6	...	17-11	4-9	27-14	1-0	7-8	10-5	9-6	9-3	11-5	2-5
9	10	Golden State Warriors	47-35	28-13	19-22	19-11	28-24	7-3	5-5	7-3	...	17-13	5-3	20-18	1-0	8-6	12-4	8-7	4-8	9-7	5-3
10	11	Chicago Bulls	45-37	24-17	21-20	34-18	11-19	13-5	9-7	12-6	...	15-15	11-7	16-16	1-0	6-7	9-6	12-4	5-8	7-7	5-5
11	12	Houston Rockets	45-37	29-12	16-25	21-9	24-28	7-3	7-3	7-3	...	16-11	5-5	26-13	1-0	6-8	10-6	8-9	6-5	9-5	5-4
12	13	Los Angeles Lakers	45-37	29-12	16-25	17-13	28-24	6-4	6-4	5-5	...	20-8	8-5	18-17	0-2	8-6	7-7	5-11	9-4	9-6	7-1
13	14	Atlanta Hawks	44-38	25-16	19-22	29-23	15-15	7-11	11-7	11-5	...	15-16	5-5	19-20	NaN	9-5	10-5	7-9	7-4	8-10	3-5
14	15	Utah Jazz	43-39	30-11	13-28	17-13	26-26	5-5	5-5	7-3	...	13-15	5-7	19-21	1-0	8-8	6-9	10-4	6-6	7-9	5-3
15	16	Boston Celtics	41-40	27-13	14-27	27-24	14-16	7-9	8-9	12-6	...	13-16	8-7	18-23	0-1	9-6	5-9	8-7	8-4	8-8	3-5
16	17	Dallas Mavericks	41-41	24-17	17-24	17-13	24-28	5-5	6-4	6-4	...	18-12	5-8	17-19	1-1	6-8	5-10	7-8	6-5	11-5	5-4
17	18	Milwaukee Bucks	38-44	21-20	17-24	24-28	14-16	11-7	7-9	6-12	...	12-19	7-5	13-25	NaN	7-7	9-6	8-7	4-8	7-9	3-7
18	19	Philadelphia 76ers	34-48	23-18	11-30	22-30	12-18	7-9	7-11	8-10	...	12-19	4-5	13-24	1-0	9-6	4-11	5-9	3-8	8-9	4-5
19	20	Toronto Raptors	34-48	21-20	13-28	22-30	12-18	5-11	8-10	9-9	...	13-16	8-8	16-22	0-1	4-12	7-7	5-10	7-5	4-11	7-2
20	21	Portland Trail Blazers	33-49	22-19	11-30	15-15	18-34	5-5	5-5	5-5	...	8-21	9-6	13-24	1-0	5-10	9-4	8-8	3-9	7-9	0-9
21	22	Minnesota Timberwolves	31-51	20-21	11-30	14-16	17-35	4-6	7-3	3-7	...	12-20	3-10	15-26	NaN	7-8	7-5	3-12	3-10	6-11	5-5
22	23	Detroit Pistons	29-53	18-23	11-30	25-27	4-26	6-12	8-8	11-7	...	8-20	6-10	15-29	0-1	5-11	6-10	6-7	6-8	1-13	5-3
23	24	Washington Wizards	29-53	22-19	7-34	15-37	14-16	5-13	5-13	5-11	...	14-17	6-9	13-17	0-1	1-12	3-11	7-9	7-5	9-8	2-7
24	25	Sacramento Kings	28-54	20-21	8-33	14-16	14-38	4-6	4-6	6-4	...	9-19	7-3	12-31	0-1	4-10	7-8	6-11	3-9	7-8	1-7
25	26	New Orleans Hornets	27-55	16-25	11-30	12-18	15-37	3-7	5-5	4-6	...	8-21	7-5	10-27	0-1	4-9	3-13	8-8	5-8	6-9	1-7
26	27	Phoenix Suns	25-57	17-24	8-33	8-22	17-35	1-9	4-6	3-7	...	8-21	6-8	10-31	0-1	7-9	4-11	5-9	4-9	3-12	2-6
27	28	Cleveland Cavaliers	24-58	14-27	10-31	18-34	6-24	5-13	3-13	10-8	...	8-21	6-11	7-27	1-0	3-12	3-13	6-8	7-5	2-12	2-8
28	29	Charlotte Bobcats	21-61	15-26	6-35	18-34	3-27	6-12	6-12	6-10	...	9-21	6-6	6-37	NaN	7-8	1-15	3-11	2-10	4-12	4-5
29	30	Orlando Magic	20-62	12-29	8-33	10-42	10-20	2-16	5-13	3-13	...	5-25	2-9	8-30	NaN	5-10	7-9	2-12	2-11	3-13	1-7