This tool is used to scrape the team schedule from baseketballmonster.com


In [3]:
import requests
import bs4

import pandas as pd
from pandas import DataFrame

In [4]:
SCHED_URL = 'https://basketballmonster.com/ScheduleGrid.aspx'

with requests.Session() as s:
    r = s.get(SCHED_URL)    
   
    soup = bs4.BeautifulSoup(r.text, "html.parser")
  
    columns = soup.find('table', class_='datatable').find('tr', class_='gridHeaderTR sportGridColor')
    weeks = soup.find('table', class_='datatable').find_all('tr')

In [5]:
cols = [col.text.strip() for col in columns.find_all('td')]

# throw away rows 'gridGames', 'maxWeek', 'qualityGames' and 'gridHeaderTR' at top and bottom
weeks = weeks[4:-4] # throw away extra TR

# get value from table cell
weeks = [[col.text.strip() for col in week.find_all('td')] for week in weeks]

In [6]:
df = DataFrame(weeks, columns=cols).set_index('Week')
del(df[''])
df


Out[6]:
Date ATL BKN BOS CHA CHI CLE DAL DEN DET ... OKC ORL PHI PHO POR SAC SAS TOR UTA WAS
Week
1 10/16/2017 3 3 3 2 2 3 3 2 3 ... 3 3 3 3 3 3 2 2 3 2
2 10/23/2017 4 4 3 4 3 4 4 4 4 ... 3 3 3 3 3 3 4 3 3 4
3 10/30/2017 3 2 4 4 3 3 4 4 3 ... 3 4 3 4 4 3 4 4 4 3
4 11/6/2017 3 4 4 2 3 3 3 3 3 ... 4 3 3 4 2 3 3 3 3 3
5 11/13/2017 3 3 3 3 3 3 3 3 3 ... 2 3 3 4 4 4 3 4 4 4
6 11/20/2017 4 3 4 4 4 3 3 3 2 ... 4 4 3 3 4 3 3 3 3 3
7 11/27/2017 2 3 3 2 3 4 3 3 4 ... 3 4 4 3 3 4 4 2 3 3
8 12/4/2017 4 3 4 4 4 4 4 4 4 ... 3 4 4 4 2 3 4 3 4 4
9 12/11/2017 3 4 4 4 3 4 3 3 4 ... 4 3 2 3 4 3 3 4 3 4
10 12/18/2017 4 3 4 4 4 2 4 4 2 ... 4 3 4 4 4 3 4 3 4 3
11 12/25/2017 3 4 4 3 4 3 4 3 3 ... 4 3 4 3 2 4 3 3 3 4
12 1/1/2018 3 3 3 2 4 3 3 3 3 ... 3 3 2 4 4 2 4 3 3 3
13 1/8/2018 3 4 1 3 3 3 3 4 3 ... 3 3 1 2 4 4 3 4 2 3
14 1/15/2018 3 4 3 3 3 3 2 3 4 ... 3 3 3 2 3 3 4 4 4 3
15 1/22/2018 4 3 3 4 4 3 4 3 3 ... 4 2 4 4 3 4 4 3 3 3
16 1/29/2018 4 4 4 4 2 3 3 4 3 ... 4 3 4 4 4 3 3 4 3 3
17 2/5/2018 4 3 4 4 3 4 4 3 4 ... 3 4 3 3 4 3 2 3 4 4
18 2/12/2018 2 2 1 1 2 1 1 2 2 ... 2 2 2 2 1 2 2 2 2 1
19 2/19/2018 1 1 2 3 2 3 2 2 2 ... 2 2 3 2 2 2 2 1 2 3
20 2/26/2018 4 4 3 4 3 3 4 3 4 ... 4 4 4 4 3 5 2 4 3 4
21 3/5/2018 3 3 3 3 4 4 3 4 3 ... 3 4 3 3 3 3 3 4 4 3
22 3/12/2018 3 3 3 3 3 3 3 3 3 ... 4 3 3 3 4 4 4 4 3 3
23 3/19/2018 4 4 3 4 4 4 3 3 4 ... 3 3 4 3 3 3 4 4 4 3
24 3/26/2018 3 3 3 4 4 4 4 4 4 ... 3 3 4 4 4 4 3 2 3 4
25 4/2/2018 4 3 4 3 3 3 4 3 3 ... 2 4 4 3 3 2 3 4 3 3
26 4/9/2018 1 2 2 1 2 2 1 2 2 ... 2 2 2 1 2 2 2 2 2 2

26 rows × 31 columns


In [7]:
df.to_json('../data/teamSched.json', orient='records', lines='TRUE')

In [8]:
df.to_csv('../data/teamSched.csv', index=False)