In [39]:
import pandas as pd
Reading csv of 1000 entries
In [40]:
movie_1000 = pd.read_csv('movie_with_boxoffice_1000.csv')
movie_1000.head(3)
Out[40]:
Reading remaing entries
In [41]:
movie_2000 = pd.read_csv('movie_with_boxoffice_2000.csv')
movie_3000 = pd.read_csv('movie_with_boxoffice_3000.csv')
movie_4000 = pd.read_csv('movie_with_boxoffice_4000.csv')
movie_5000 = pd.read_csv('movie_with_boxoffice_5000.csv')
movie_6000 = pd.read_csv('movie_with_boxoffice_6000.csv')
movie_7000 = pd.read_csv('movie_with_boxoffice_7000.csv')
movie_8000 = pd.read_csv('movie_with_boxoffice_8000.csv')
movie_9000 = pd.read_csv('movie_with_boxoffice_9000.csv')
movie_10060 = pd.read_csv('movie_with_boxoffice_10060.csv')
Dataframe with all entries
In [42]:
boxoffice = movie_1000.append([movie_2000, movie_3000,movie_4000, movie_5000, movie_6000,\
movie_7000, movie_8000, movie_9000, movie_10060], ignore_index = True, verify_integrity = True)
# boxoffice.to_csv('boxoffice.csv', index = False)
Reading dataset without boxoffice
In [43]:
datasetWithoutBoxoffice = pd.read_csv('datasetWithoutBoxoffice.csv')
datasetWithoutBoxoffice.head(3)
Out[43]:
Merging using IMDB ID as key
In [44]:
result = datasetWithoutBoxoffice.merge(boxoffice, left_on='IMDB ID', right_on='IMDB ID', how = 'inner' )
result.head(3)
Out[44]:
Converting Dataframe to csv
In [45]:
result.to_csv('datasetWithBoxoffice.csv',index = False)
In [ ]: