Extracting Ticket Inflation Price Table


In [8]:
import requests
from bs4 import BeautifulSoup
import csv

Request the url using request and get the content of server's response


In [9]:
url = 'http://www.boxofficemojo.com/about/adjuster.htm'

try:
    requestUrlText = requests.get(url).text
except requests.exceptions.RequestException:
    print ('URL Error!!!')

Parse the document using BeautifulSoup


In [10]:
soup = BeautifulSoup(requestUrlText, 'lxml')

Selecting range of required rows


In [11]:
rangeOfTable = soup.find(justify='right').find_all('td')[2:]

List with

  • First Index - Year
  • Second Index - Avg. Price

In [12]:
inflationTicketTableList = [[i.text for i in rangeOfTable if '$' not in i.text],\
                            [i.text.split('$')[1] for i in rangeOfTable if '$' in i.text]]

Creating a CSV file


In [13]:
with open('ticketPriceInflation.csv','w') as csvfile:
    fieldnames = ['YEAR','AVG. PRICE']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(len(inflationTicketTableList[0])):
        writer.writerow({'YEAR':inflationTicketTableList[0][i], 'AVG. PRICE':inflationTicketTableList[1][i]})

In [ ]: