In [1]:
import pandas as pd
import requests
from datetime import datetime

Deal with Injurty data


In [ ]:
df = pd.read_csv('injury.csv', header=0)

In [ ]:
df

In [ ]:
df['Date'] = df['Date'].apply(lambda x:'0'+str(x) if len(str(x)) == 1 else str(x))

In [ ]:
for n in df['Content'].unique():
    if 'rest' in n or 'returned' in n or 'sore' in n:
        print(n)

In [ ]:
delete = ['sore left foot', 'sore right knee','sore left shin','rest','general soreness / rest','returned to lineup','sore right shoulder']
for n in delete:
    df = df.drop(df.loc[df.Content == n].index)

In [ ]:
df.set_index('Date').to_csv('injury.csv',index='Date')

In [ ]:
df.drop_duplicates().set_index('Date').to_csv('injury.csv')

Get data from NBA stats


In [ ]:
HEADERS = {'user-agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) '
                          'AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/45.0.2454.101 Safari/537.36'),
           'referer': 'http://stats.nba.com/scores/'
          }

In [ ]:
# Get all Kobe shot data from 1996 to 2016 and put it into an array
# This player ID comes from stats.nba.com (http://stats.nba.com/player/#!/977/stats/)
playerID = "977"
seasons = []
for season in range(1996,2016):
    # The stats.nba.com API wants season as "1996-97"
    seasonString = str(season) + '-' + str(season+1)[2:]

    # The stats.nba.com endpoint we are using is http://stats.nba.com/stats/shotchartdetail
    # More info on endpoints: https://github.com/seemethere/nba_py/wiki/stats.nba.com-Endpoint-Documentation
    shot_chart_url = 'http://stats.nba.com/stats/shotchartdetail?Period=0&VsConference=&LeagueID=00&LastNGames=0&TeamID=0&Position=&Location=&Outcome=&ContextMeasure=FGA&DateFrom=&StartPeriod=&DateTo=&OpponentTeamID=0&ContextFilter=&RangeType=&Season=' + seasonString + '&AheadBehind=&EndRange=&VsDivision=&PointDiff=&RookieYear=&GameSegment=&Month=0&ClutchTime=&StartRange=&EndPeriod=&SeasonType=Regular+Season&SeasonSegment=&GameID=&PlayerID=' + playerID + '&CFID=&PlayerPosition='
    response = requests.get(shot_chart_url, headers=HEADERS)
    # Split response into headers and content
    headers = response.json()['resultSets'][0]['headers']
    shots = response.json()['resultSets'][0]['rowSet']

    # Create pandas dataframe to hold the data
    shot_df = pd.DataFrame(shots, columns=headers)

    # add extra column for season
    shot_df['SEASON'] = seasonString

    # add extra column for playoff flag
    shot_df['playoffs'] = 0
    
    seasons.append(shot_df)
    

# Do the same thing for all the playoff shots
for season in range(1996,2016):
    seasonString = str(season) + '-' + str(season+1)[2:]

    # This URL is the same except for the parameter SeasonType=Playoffs
    shot_chart_url = 'http://stats.nba.com/stats/shotchartdetail?Period=0&VsConference=&LeagueID=00&LastNGames=0&TeamID=0&Position=&Location=&Outcome=&ContextMeasure=FGA&DateFrom=&StartPeriod=&DateTo=&OpponentTeamID=0&ContextFilter=&RangeType=&Season=' + seasonString + '&AheadBehind=&EndRange=&VsDivision=&PointDiff=&RookieYear=&GameSegment=&Month=0&ClutchTime=&StartRange=&EndPeriod=&SeasonType=Playoffs&SeasonSegment=&GameID=&PlayerID=' + playerID + '&CFID=&PlayerPosition='
    response = requests.get(shot_chart_url, headers=HEADERS)
    
    headers = response.json()['resultSets'][0]['headers']
    shots = response.json()['resultSets'][0]['rowSet']
    
    shot_df = pd.DataFrame(shots, columns=headers)
    shot_df['SEASON'] = str(season) + '-' + str(season+1)[2:]
    shot_df['playoffs'] = 1
    
    seasons.append(shot_df)

In [ ]:
# combine all season and playoffs dataframes into one dataframe
kobe_all_shots = pd.concat(seasons)
kobe_all_shots.columns = [n.lower() for n in kobe_all_shots.columns]
# dump a csv file
kobe_all_shots.to_csv("kobe_all_shots.csv")

Add Score column for each shot


In [ ]:
df = pd.read_csv('kobe.csv', index_col=0)

In [ ]:
df['score'] = df['shot_made_flag'] * 1

In [ ]:
df['score'] = df['score'] * df['shot_type'].apply(lambda x:2 if '2' in x else 3)

In [ ]:
df.to_csv("kobe.csv")

Add opponent column


In [ ]:
data = pd.read_csv('kobe.csv', index_col=0)

In [ ]:
data.shape

In [ ]:
data['opponent'] = data['htm'].apply(lambda x: '' if x=='LAL' else x) + data['vtm'].apply(lambda x: '' if x=='LAL' else x)

In [ ]:
data.to_csv("kobe.csv")

In [ ]:
data = pd.read_csv('stat.csv')

In [ ]:
for n in data.columns:
    print('''<a class="dropdown-item" >{a}</a>'''.format(a=n))

Algorithm


In [2]:
df = pd.read_csv('stat.csv')

In [3]:
# stats
stat = df.iloc[:20]

In [4]:
stat_median = stat.median()

In [5]:
# offense: ORB 0.25, FG 0.03125, FGA 0.03125, 3P 0.03125, 3PA 0.03125, 2P 0.03125, 2PA 0.03125, FT 0.03125, FTA 0.03125, AST 0.25, TOV 0.25
stat_off = stat[['Season', 'ORB', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'AST', 'TOV']]
stat_median_off = stat_median[['ORB', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'AST', 'TOV']]

# defencse: DRB 0.33, STL 0.33, BLK 0.33
stat_def = stat[['Season','DRB', 'STL', 'BLK']]
stat_median_def = stat_median[['Season','DRB', 'STL', 'BLK']]

# Efficiency: MP 0.5, FG% 0.1, 3P% 0.1, 2P% 0.1, eFG% 0.1, FT% 0.1
stat_eff = stat[['Season','MP', 'FG%', '2P%', 'eFG%', 'FT%']]
stat_median_eff = stat_median[['MP', 'FG%', '2P%', 'eFG%', 'FT%']]

In [6]:
weights = {'ORB':0.25,
           'FG': 0.03125, 
           'FGA': 0.03125, 
           '3P': 0.03125,
           '3PA': 0.03125,
           '2P': 0.03125,
           '2PA': 0.03125,
           'FT': 0.03125,
           'FTA': 0.03125,
           'AST':0.25,
           'TOV':0.25,
           'DRB':1.0/3, 
           'STL':1.0/3, 
           'BLK':1.0/3,
           'MP':0.1,
           'FG%':0.1,
           '2P%':0.1,
           'eFG%':0.1,
           'FT%':0.1
          }

In [7]:
scores = pd.DataFrame(columns=['Season','Off', 'Def', 'Eff'])
for n in stat['Season']:
    temp = stat.loc[stat['Season']==n]
    off_score = 0
    def_score = 0
    eff_score = 0
    total = 0
    
    for i in ['ORB', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'AST', 'TOV']:
        off_score += temp[i] * weights[i]
    for i in ['DRB', 'STL', 'BLK']:
        def_score += temp[i] * weights[i]
    for i in ['MP', 'FG%', '2P%', 'eFG%', 'FT%']:
        eff_score += temp[i] * weights[i]
    total = off_score + def_score + eff_score
    scores = scores.append(pd.DataFrame(data=[[n, off_score.values[0], def_score.values[0], eff_score.values[0]]], 
                               columns=['Season','Off', 'Def', 'Eff']))
scores[['Off','Def','Eff']] = scores[['Off','Def','Eff']]/scores[['Off','Def','Eff']].max()

In [8]:
js_object = {}
for n in scores.values:
    temp = []
    temp.append({'axis':'Offense','value':n[1]})
    temp.append({'axis':'Defence','value':n[2]})
    temp.append({'axis':'Efficiency','value':n[3]})
    js_object[n[0]] = [temp]

In [9]:
js_object


Out[9]:
{'1996-97': [[{'axis': 'Offense', 'value': 0.29328621908127206},
   {'axis': 'Defence', 'value': 0.2558139534883721},
   {'axis': 'Efficiency', 'value': 0.40350235471629103}]],
 '1997-98': [[{'axis': 'Offense', 'value': 0.5147232037691402},
   {'axis': 'Defence', 'value': 0.4069767441860466},
   {'axis': 'Efficiency', 'value': 0.6434776644872205}]],
 '1998-99': [[{'axis': 'Offense', 'value': 0.7084805653710247},
   {'axis': 'Defence', 'value': 0.7674418604651163},
   {'axis': 'Efficiency', 'value': 0.9185679667139134}]],
 '1999-00': [[{'axis': 'Offense', 'value': 0.8133097762073026},
   {'axis': 'Defence', 'value': 0.8372093023255814},
   {'axis': 'Efficiency', 'value': 0.9251063051529423}]],
 '2000-01': [[{'axis': 'Offense', 'value': 0.929328621908127},
   {'axis': 'Defence', 'value': 0.7674418604651163},
   {'axis': 'Efficiency', 'value': 0.98738054958621}]],
 '2001-02': [[{'axis': 'Offense', 'value': 0.8828032979976442},
   {'axis': 'Defence', 'value': 0.6976744186046512},
   {'axis': 'Efficiency', 'value': 0.9273924374742811}]],
 '2002-03': [[{'axis': 'Offense', 'value': 1.0},
   {'axis': 'Defence', 'value': 1.0},
   {'axis': 'Efficiency', 'value': 1.0}]],
 '2003-04': [[{'axis': 'Offense', 'value': 0.8339222614840989},
   {'axis': 'Defence', 'value': 0.6976744186046512},
   {'axis': 'Efficiency', 'value': 0.9103607516803073}]],
 '2004-05': [[{'axis': 'Offense', 'value': 0.9888103651354533},
   {'axis': 'Defence', 'value': 0.7674418604651164},
   {'axis': 'Efficiency', 'value': 0.9808193498239678}]],
 '2005-06': [[{'axis': 'Offense', 'value': 0.9764428739693758},
   {'axis': 'Defence', 'value': 0.7674418604651164},
   {'axis': 'Efficiency', 'value': 0.9892780394129213}]],
 '2006-07': [[{'axis': 'Offense', 'value': 0.9605418138987044},
   {'axis': 'Defence', 'value': 0.7674418604651163},
   {'axis': 'Efficiency', 'value': 0.9860088701934067}]],
 '2007-08': [[{'axis': 'Offense', 'value': 0.9040047114252061},
   {'axis': 'Defence', 'value': 0.8720930232558141},
   {'axis': 'Efficiency', 'value': 0.9417036258058614}]],
 '2008-09': [[{'axis': 'Offense', 'value': 0.8415783274440518},
   {'axis': 'Defence', 'value': 0.7093023255813954},
   {'axis': 'Efficiency', 'value': 0.8783548991815646}]],
 '2009-10': [[{'axis': 'Offense', 'value': 0.8863368669022379},
   {'axis': 'Defence', 'value': 0.7093023255813954},
   {'axis': 'Efficiency', 'value': 0.9382744273238536}]],
 '2010-11': [[{'axis': 'Offense', 'value': 0.8274440518256773},
   {'axis': 'Defence', 'value': 0.627906976744186},
   {'axis': 'Efficiency', 'value': 0.8265054181336018}]],
 '2011-12': [[{'axis': 'Offense', 'value': 0.9057714958775028},
   {'axis': 'Defence', 'value': 0.6744186046511628},
   {'axis': 'Efficiency', 'value': 0.9304787161080883}]],
 '2012-13': [[{'axis': 'Offense', 'value': 0.9334511189634864},
   {'axis': 'Defence', 'value': 0.744186046511628},
   {'axis': 'Efficiency', 'value': 0.9353939005989668}]],
 '2013-14': [[{'axis': 'Offense', 'value': 0.8227326266195523},
   {'axis': 'Defence', 'value': 0.627906976744186},
   {'axis': 'Efficiency', 'value': 0.7251154496822277}]],
 '2014-15': [[{'axis': 'Offense', 'value': 0.8745583038869258},
   {'axis': 'Defence', 'value': 0.744186046511628},
   {'axis': 'Efficiency', 'value': 0.8343925746422204}]],
 '2015-16': [[{'axis': 'Offense', 'value': 0.570082449941107},
   {'axis': 'Defence', 'value': 0.48837209302325585},
   {'axis': 'Efficiency', 'value': 0.6906634355996526}]]}

In [ ]: