In [5]:
!ls ../


LICENSE            notebooks          sample-files
data               parsed-movie-files

In [7]:
from pandas import DataFrame, read_csv
import pandas as pd
df = read_csv(r'/Users/seanreed1/AnacondaProjects/scrapy-projects/movie-project/box-office-data/data/final-box-office-data.csv')

In [9]:
df.head()


Out[9]:
Rank Title Release_Year Domestic Box Office International Box Office Total Box Office
0 1 Star Wars Ep. VII: The Force Awakens 2015 $936,662,225 $1,122,000,000 $2,058,662,225
1 2 Avatar 2009 $760,507,625 $2,023,411,357 $2,783,918,982
2 3 Titanic 1997 $658,672,302 $1,548,943,366 $2,207,615,668
3 4 Jurassic World 2015 $652,198,010 $1,019,442,583 $1,671,640,593
4 5 The Avengers 2012 $623,279,547 $896,200,000 $1,519,479,547

In [10]:
df.ix[:10,:]


Out[10]:
Rank Title Release_Year Domestic Box Office International Box Office Total Box Office
0 1 Star Wars Ep. VII: The Force Awakens 2015 $936,662,225 $1,122,000,000 $2,058,662,225
1 2 Avatar 2009 $760,507,625 $2,023,411,357 $2,783,918,982
2 3 Titanic 1997 $658,672,302 $1,548,943,366 $2,207,615,668
3 4 Jurassic World 2015 $652,198,010 $1,019,442,583 $1,671,640,593
4 5 The Avengers 2012 $623,279,547 $896,200,000 $1,519,479,547
5 6 The Dark Knight 2008 $533,345,358 $469,451,702 $1,002,797,060
6 7 Rogue One: A Star Wars Story 2016 $532,177,324 $518,811,164 $1,050,988,488
7 8 Beauty and the Beast 2017 $504,014,165 $756,108,198 $1,260,122,363
8 9 Finding Dory 2016 $486,295,561 $536,321,815 $1,022,617,376
9 10 Star Wars Ep. I: The Phantom Menace 1999 $474,544,677 $552,500,000 $1,027,044,677
10 11 Star Wars Ep. IV: A New Hope 1977 $460,998,007 $325,600,000 $786,598,007

In [17]:
# extract title and release year to feed into OBMD process chain
title_plus_year = df[['Title', 'Release_Year']]
title_plus_year.head()


Out[17]:
Title Release_Year
0 Star Wars Ep. VII: The Force Awakens 2015
1 Avatar 2009
2 Titanic 1997
3 Jurassic World 2015
4 The Avengers 2012

In [25]:
title_plus_year.to_csv(r'/Users/seanreed1/AnacondaProjects/scrapy-projects/movie-project/box-office-data/data/title_plus_year.csv', index=False)

In [30]:
#check to make sure the file actually saved correctly
title_plus_year2= read_csv(r'/Users/seanreed1/AnacondaProjects/scrapy-projects/movie-project/box-office-data/data/title_plus_year.csv')

In [31]:
title_plus_year2.head()


Out[31]:
Title Release_Year
0 Star Wars Ep. VII: The Force Awakens 2015
1 Avatar 2009
2 Titanic 1997
3 Jurassic World 2015
4 The Avengers 2012

In [32]:
title_plus_year2.tail()


Out[32]:
Title Release_Year
6095 Heart Condition 1990
6096 Venom 1982
6097 Partners 1982
6098 Kama Sutra 1997
6099 Serbuan maut 2012

In [33]:
#ok, data has been saved. Now, on to the OMDB data downloader

In [ ]: