In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
In [2]:
seasons = pd.read_csv('./data/nba/Seasons_Stats.csv')
seasons.drop('Unnamed: 0', axis=1, inplace=True)
seasons = seasons[seasons['Player'] !=0]
#seasons = seasons[seasons['Year'] >= 1980]
seasons = seasons[seasons['Tm'] != 'TOT'] # TOTAL per season (when some player change a team during the season)
In [3]:
seasons['PPG'] = seasons.PTS/seasons.G # points per game
In [4]:
seasons.head().T
Out[4]:
In [5]:
seasons.sort_values(by='PPG', ascending=False)[['Year', 'Player', 'PPG']]
Out[5]:
In [6]:
def get_top_players(agg_func, by = 'PPG'):
return seasons.groupby('Player', as_index=False) \
.agg(agg_func) \
.sort_values(by=by, ascending=False) \
.head(10)[['Player', by]]
In [7]:
top_players_ppg_mean = get_top_players(np.mean)
top_players_ppg_sum = get_top_players(np.sum, by='PTS')
In [8]:
top_players_ppg_mean
Out[8]:
In [9]:
top_players_ppg_sum
Out[9]:
In [19]:
d = seasons.groupby('Tm', as_index=False) \
.agg(np.sum) \
.sort_values(by='PTS', ascending=False) \
.head(20)
sns.barplot(data=d, x='Tm', y='PTS')
Out[19]:
In [11]:
seasons['PTSpm'] = seasons['PTS'] / seasons['MP']
seasons['ASTpm'] = seasons['AST'] / seasons['MP']
seasons['TRBpm'] = seasons['TRB'] / seasons['MP']
In [12]:
# normalized by minutes played
seasons.groupby('Age') \
.agg(np.mean)[['PTSpm', 'ASTpm', 'TRBpm']] \
.plot()
# not normalized
seasons.groupby('Age') \
.agg(np.mean)[['PTS', 'AST', 'TRB']] \
.plot()
Out[12]:
In [13]:
seasons.groupby('Age').agg(np.mean)[['MP']].plot()
Out[13]:
In [14]:
# same as above but with regression curve instead of a mean
sns.lmplot(x='Age', y='MP', data=seasons, lowess=True)
Out[14]:
In [16]:
plt.figure(figsize=(15,15))
plt.plot(seasons.Year,seasons.PPG,'o', alpha = 0.1, color='#00736A')
plt.plot(seasons.Year[seasons.Player == 'Kobe Bryant'],seasons.PPG[seasons.Player == 'Kobe Bryant'], color = '#FF5A00')
plt.plot(seasons.Year[seasons.Player == 'Michael Jordan*'],seasons.PPG[seasons.Player == 'Michael Jordan*'], color = 'r')
plt.plot(seasons.Year[seasons.Player == 'Wilt Chamberlain*'],seasons.PPG[seasons.Player == 'Wilt Chamberlain*'])
plt.legend(['All','Kobe Bryant','Michael Jordan','Wilt Chamberlain*'], fontsize = 15)
plt.xlabel('Year')
plt.ylabel('Points Per Game')
Out[16]: