In [10]:
import pandas as pd
import math
import csv
Read Ticket Inflation Price by Year
In [21]:
ticketPriceByYear = pd.read_csv('ticketPriceInflation.csv')
ticketPriceByYear.head(3)
Out[21]:
Read dataset with box office
In [22]:
movie = pd.read_csv('datasetWithBoxoffice.csv')
movie.head(3)
Out[22]:
Getting 2014 Ticket price
In [14]:
ticketPrice2014 = ticketPriceByYear[ticketPriceByYear['YEAR'] == 2014]['AVG. PRICE'].values[0]
Function to calculate Adjusted Box Office
In [15]:
def calculateAdjustedBoxOffice(year,boxOffice):
if year == 2014 or math.isnan(boxOffice):
return boxOffice
else:
ticketPrice = ticketPriceByYear[ticketPriceByYear['YEAR'] == year]['AVG. PRICE'].values[0]
estimateTicket = (boxOffice/ticketPrice)
return math.floor(estimateTicket * ticketPrice2014)
List with
In [16]:
listAdjustTicketPriceInflation = [[imdbID for imdbID in movie['IMDB ID']], \
[calculateAdjustedBoxOffice(i[3], i[11])for i in movie.values]]
Create a csv file
In [17]:
with open('adjustedBoxOffice.csv','w') as csvfile:
fieldnames = ['IMDB ID','ADJ. BOX OFFICE']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i in range(len(listAdjustTicketPriceInflation[0])):
writer.writerow({'IMDB ID': listAdjustTicketPriceInflation[0][i], 'ADJ. BOX OFFICE': listAdjustTicketPriceInflation[1][i]})
In [18]:
adjustedBoxOffice = pd.read_csv('adjustedBoxOffice.csv')
adjustedBoxOffice.head(3)
Out[18]:
Merging csv using IMDB ID as key
In [19]:
result = movie.merge(adjustedBoxOffice, left_on='IMDB ID', right_on='IMDB ID',how='inner')
result.head(3)
Out[19]:
Converting Dataframe to csv
In [20]:
result.to_csv('finalDataset.csv', index=False)
In [ ]: