This tool is used to scrape the team schedule from baseketballmonster.com
In [3]:
import requests
import bs4
import pandas as pd
from pandas import DataFrame
In [4]:
SCHED_URL = 'https://basketballmonster.com/ScheduleGrid.aspx'
with requests.Session() as s:
r = s.get(SCHED_URL)
soup = bs4.BeautifulSoup(r.text, "html.parser")
columns = soup.find('table', class_='datatable').find('tr', class_='gridHeaderTR sportGridColor')
weeks = soup.find('table', class_='datatable').find_all('tr')
In [5]:
cols = [col.text.strip() for col in columns.find_all('td')]
# throw away rows 'gridGames', 'maxWeek', 'qualityGames' and 'gridHeaderTR' at top and bottom
weeks = weeks[4:-4] # throw away extra TR
# get value from table cell
weeks = [[col.text.strip() for col in week.find_all('td')] for week in weeks]
In [6]:
df = DataFrame(weeks, columns=cols).set_index('Week')
del(df[''])
df
Out[6]:
In [7]:
df.to_json('../data/teamSched.json', orient='records', lines='TRUE')
In [8]:
df.to_csv('../data/teamSched.csv', index=False)