In [138]:

    
import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot
import math



In [139]:

    
title_file = "titles.csv"
release_date_file = "release_dates.csv"
cast_file = "cast.csv"

files = [title_file, release_date_file,cast_file]
dataframes = {}

for f in files:
    csv_file_object = pd.read_csv(open(f), header=0)
    dataframes[f] = csv_file_object

Question 1: How many movies are listed in the titles dataframe?

210591

Bonus

Unique movies == 192839



In [140]:

    
dataframes['titles.csv'].count()
len(dataframes['titles.csv'].ix[:,0].unique())









    Out[140]:





192839

Question 2: What are the earliest two films listed in the titles dataframe?

Miss Jerry (1894) && Reproduction of the Corbett and Fitzsimmons Fight (1897)



In [141]:

    
dataframes['titles.csv'].sort_values(['year']).iloc[0:2]









    Out[141]:






  
    
      
      title
      year
    
  
  
    
      161037
      Miss Jerry
      1894
    
    
      83650
      Reproduction of the Corbett and Fitzsimmons Fight
      1897

Question 3: How many movies have the title "Hamlet"?



In [142]:

    
len(np.where(dataframes['titles.csv']['title'] == 'Hamlet')[0])









    Out[142]:





19

Question 4: How many movies are titled "North by Northwest"?



In [143]:

    
len(np.where(dataframes['titles.csv']['title'] == 'North by Northwest')[0])









    Out[143]:





1

Question 5: When was the first movie titled "Hamlet" made?

1910



In [144]:

    
dataframes['titles.csv'].loc[(dataframes['titles.csv']['title']=='Hamlet'), 'year'].sort_values().iloc[0]









    Out[144]:





1910

Question 6: List all of the "Treasure Island" movies from earliest to most recent

1918
1920
1934
1950
1972
1973
1985
1999



In [145]:

    
dataframes['titles.csv'].loc[(dataframes['titles.csv']['title']=='Treasure Island'), 'year'].sort_values()









    Out[145]:





186595    1918
46520     1920
188082    1934
88003     1950
207718    1972
101151    1973
186007    1985
162493    1999
Name: year, dtype: int64

Question 7. How many movies were made in the year 1950?

1033



In [147]:

    
dataframes['titles.csv'].loc[(dataframes['titles.csv']['year']==1950), 'year'].count()









    Out[147]:





1033

Question 8. How many movies were made from 1950 through 1959?

11999



In [149]:

    
dataframes['titles.csv'].loc[(dataframes['titles.csv']['year']>=1950) & (dataframes['titles.csv']['year']<=1959), 'year'].count()









    Out[149]:





11999

Question 9. In what years has a movie titled "Batman" been released?



In [126]:

    
len(dataframes['titles.csv'].loc[(dataframes['titles.csv']['title']=='Batman'), 'year'].unique())









    Out[126]:





2

Question 10. How many roles were there in the movie "Inception"?



In [150]:

    
dataframes['cast.csv'].loc[(dataframes['cast.csv']['title']=='Inception'), 'character'].count()









    Out[150]:





72

WE GIVE UP



In [137]:

    
dataframes['titles.csv'].groupby('year')
#dataframes['titles.csv']['decade']= pd.Series(math.floor(dataframes['titles.csv']['year'].apply(int)/10.0)*10)
#math.floor(dataframes['titles.csv']['year'].apply(float))









    Out[137]:





<pandas.core.groupby.DataFrameGroupBy object at 0x263bf0b90>



In [ ]:

	title	year
161037	Miss Jerry	1894
83650	Reproduction of the Corbett and Fitzsimmons Fight	1897