In [1]:
import pandas as pd
import csv
Reading dataset without Box Office
In [3]:
movie = pd.read_csv('dataset02.csv')
movie.head(3)
Out[3]:
Filtering data with YEAR range 1990-2014 and COUNTRY as USA and LANGUAGE as English
In [4]:
filteringDataset = movie[(movie.YEAR >= 1990) & (movie.YEAR <= 2014) &
(movie.COUNTRY.str.contains('USA') & (movie.LANGUAGE.str.contains('English')))]
Creating a dataframe and checking for multiple IMDB ID's
In [14]:
datasetDataframe = pd.DataFrame(filteringDataset)
datasetDataframe['IMDB ID'].value_counts()[0:3]
Out[14]:
One IMDB ID multiple entry found 'tt2279864'
In [15]:
datasetDataframe[datasetDataframe['IMDB ID'] == 'tt2279864']
Out[15]:
Getting the correct row of IMDB ID 'tt2279864'
In [18]:
idtt2279864 = datasetDataframe.loc[25630]
Dataframe without IMDB ID 'tt2279864'
In [19]:
without_tt2279864 = datasetDataframe[datasetDataframe['IMDB ID'] != 'tt2279864']
Appending the single entry of IMDB ID 'tt2279864' to Dataframe without IMDB ID 'tt2279864'
In [20]:
finalDataset = without_tt2279864.append(idtt2279864, ignore_index=True)
No multiple entries of IMDB ID found
In [21]:
finalDataset['IMDB ID'].value_counts()[0:3]
Out[21]:
Converting Dataframe to csv
In [22]:
finalDataset.to_csv('datasetWithoutBoxOffice.csv', index=False)
In [ ]: