In [1]:
from datetime import datetime
import urllib.request
import urllib.response
from bs4 import BeautifulSoup

In [2]:
url = "http://www.smh.com.au/business/markets/52-week-highs?page=-1"
resp = urllib.request.urlopen(url)
soup = BeautifulSoup(resp.read(), 'lxml')

In [11]:
# Get date of new highs
date_string = soup.select("#content > section > header > p > time")[0].text
print("Date of new highs: {0}".format(date_string))
date = datetime.strptime(date_string, "%B %d, %Y")
# Primarily looking at weekly data, so group by iso week of year.
_, wk, day = date.isocalendar()
print("Week of year: {0}, Day of week: {1}".format(wk, day))


Date of new highs: February 9, 2015
Week of year: 7, Day of week: 1

In [5]:
new_highs = [elem.text for elem in soup.select("#content > section > table > tbody > tr > th > a")]

In [6]:


In [ ]: