In [3]:
import numpy as np
import pandas as pd
import requests
import keras
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import random
import calendar
%matplotlib inline
In [6]:
#http://danielfrg.com/blog/2013/04/01/nba-scraping-data/
url = 'http://espn.go.com/nba/teams'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib')
tables = soup.find_all('ul', class_='medium-logos')
teams = []
prefix_1 = []
prefix_2 = []
teams_urls = []
for table in tables:
lis = table.find_all('li')
for li in lis:
info = li.h5.a
teams.append(info.text)
url = info['href']
teams_urls.append(url)
prefix_1.append(url.split('/')[-2])
prefix_2.append(url.split('/')[-1])
dic = {'url': teams_urls, 'prefix_2': prefix_2, 'prefix_1': prefix_1}
teams = pd.DataFrame(dic, index=teams)
In [8]:
BASE_URL = 'http://www.espn.com/nba/team/stats/_/name/{0}/year/{1}'
column_headers = [u'PLAYER', u'GP', u'GS', u'MIN', u'PPG', u'OFFR', u'DEFR', u'RPG', u'APG', u'SPG', u'BPG', u'TPG', u'FPG', u'A/TO', u'PER']
def normalizeColumn(column):
theMax = column.max()
theMin = column.min()
normalizedColumn = [ (record - theMin) / (theMax - theMin) for record in column]
return normalizedColumn
for year in range(2003, 2018):
player_data = []
for index, row in teams.iterrows():
r = requests.get(BASE_URL.format(row['prefix_1'], year))
table = BeautifulSoup(r.text, 'html5lib').table
for row in table.find_all('tr')[2:][:-1]: # Remove header
data = [td.getText() for td in row.findAll('td')]
formattedData = [data[0].encode('utf-8')] + [float(x) for x in data[1:]] #formats the none-name fields to floats so we can do math
player_data.append(formattedData)
df = pd.DataFrame(player_data, columns=column_headers)
for column in df.iloc[:,1:]:
data = normalizeColumn(df[column])
df[column + "_Normalized"] = data
df['scores_Normalized'] = df.iloc[:,15:].apply(sum, axis = 1)
df.to_csv("player_stats_" + str(year) + ".csv")
There are 2 more data needed for this training.
The schedule of each game 'http://espn.go.com/nba/team/schedule/_/name/{0}/year/{1}/{2}'
the stats of each player on all game they have played "http://www.espn.com/nba/player/gamelog/_/id/1975/carmelo-anthony"