In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
#Helper function to calculate the profile off the player
def profile(dataframe, ratio=False):
all_players_shots = dataframe.replace('made', 1).replace('missed', 0).sort_values('SHOT_DIST', ascending=False)
all_made_cumsum = all_players_shots['SHOT_RESULT'].cumsum()
all_dist = all_players_shots['SHOT_DIST']
return (all_dist, all_made_cumsum if ratio else all_made_cumsum/all_made_cumsum.count())
In [3]:
#Loading the dataset (https://www.kaggle.com/dansbecker/nba-shot-logs)
shot_logs = pd.read_csv('./shot_logs.csv')
shot_logs.tail()
Out[3]:
In [4]:
print("Total missing values: ")
shot_logs.isnull().any(axis=1).sum()
Out[4]:
In [5]:
print("Missing values per column")
shot_logs.isnull().sum()
Out[5]:
In [6]:
# Calculates, for each player its overall conversion rate as the made shots over all shots took
players_ratios = {}
for player_name in shot_logs["player_name"].unique():
player_shots = shot_logs[player_name==shot_logs["player_name"]]
missed = player_shots[player_shots["SHOT_RESULT"] == 'missed']["SHOT_RESULT"].count()
made = player_shots[player_shots["SHOT_RESULT"] == 'made']["SHOT_RESULT"].count()
players_ratios[player_name] = float("%.2f"%(made/(missed+made)))
In [7]:
#Generating a data frame with the overall ratio for each player, now we can query by player name easily
overall_shot_ratio = pd.DataFrame(players_ratios, index=['ratio'])
overall_shot_ratio['kobe bryant']
Out[7]:
In [8]:
#Now lets do the same thing for 3 pts shot
players_ratios_3 = {}
for player_name in shot_logs["player_name"].unique():
player_shots = shot_logs[player_name==shot_logs["player_name"]]
three_pointers = player_shots[player_shots["SHOT_RESULT"] == 'made'][player_shots["PTS_TYPE"] == 3]["PTS_TYPE"].count()
players_ratios_3[player_name] = float("%.2f"%(three_pointers/player_shots.PTS_TYPE.count()))
In [9]:
three_points_shot_ratios = pd.DataFrame(players_ratios_3, index=['ratio'])
three_points_shot_ratios['kobe bryant']
Out[9]:
In [10]:
plt.figure(figsize=(10,5))
#Lets plot the histogram for the shot ratio
plt.title("Avg. Shots Made Ratio Histogram")
plt.ylabel("Frequency")
plt.xlabel("Shots Made Ratio")
plt.hist(overall_shot_ratio.T['ratio'], alpha =0.85)
#Lets plot the histogram for the 3 points shot ratio
plt.hist(three_points_shot_ratios.T['ratio'], color='g', alpha=0.85)
plt.grid(axis='y')
plt.legend(['Overall', '3-pointers'])
plt.show()
In [11]:
#Now, lets focus on a specific player
lebron_shots = shot_logs[shot_logs['player_name'] == 'lebron james']
lebron_shots.head()
Out[11]:
In [12]:
#Let's see how he does according to the distance of the shots
plt.title("Distance accuracy profile for L. James")
plt.ylabel("Made shots")
plt.xlabel("Distance from basket (ft.)")
plt.grid()
lebron_profile = profile(lebron_shots)
plt.plot(*lebron_profile)
plt.show()
In [13]:
# Let's see how this compares to the average player
avg_profile = profile(shot_logs)
plt.title("Distance accuracy profile for L. James vs Avg. Player")
plt.ylabel("Made shots")
plt.xlabel("Distance from basket (ft.)")
plt.grid()
plt.plot(*lebron_profile)
plt.plot(*avg_profile)
plt.legend(['L. James', 'Avg'])
plt.show()
In [15]:
# Now let's compare maybe 4 or 5 players
plt.figure(figsize=(10,7))
legends = ['Avg']
plt.title("Distance accuracy profile for some players")
plt.ylabel("Made shots")
plt.xlabel("Distance from basket (ft.)")
plt.grid()
plt.plot(*avg_profile, '--')
#Grab the first 5 players to profile
for player_name in shot_logs['player_name'].unique()[:5]:
player_profile = profile(shot_logs[shot_logs['player_name']== player_name])
plt.plot(*player_profile, '-')
legends.append(player_name)
plt.legend(legends)
plt.show()
In [17]:
# All right, Brian Roberts looks above the average profile, lets see how his
# profile behaves under different situtations
brian_roberts = shot_logs[shot_logs['player_name'] == 'brian roberts']
normal_profile = profile(brian_roberts)
close_defender = profile(brian_roberts[brian_roberts['CLOSE_DEF_DIST'] <= 3.3])
medium_range_defender = profile(brian_roberts[brian_roberts['CLOSE_DEF_DIST'] > 3.3][brian_roberts['CLOSE_DEF_DIST'] <= 6.6])
far_defender = profile(brian_roberts[brian_roberts['CLOSE_DEF_DIST'] > 6.6])
plt.figure(figsize=(8,5))
plt.plot(*normal_profile)
plt.plot(*close_defender)
plt.plot(*medium_range_defender)
plt.plot(*far_defender)
plt.plot(*avg_profile, '--')
plt.ylabel("Made shots")
plt.xlabel("Distance from basket (ft.)")
plt.grid()
plt.xticks(range(0,50, 5))
plt.legend(['Brian Avg.', 'Close Defender', 'Medium Range Defender', 'Far Defender', 'Avg'])
plt.show()
In [ ]: