In [1]:
%matplotlib inline
import pandas as pd

In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[2]:

In [3]:
titles = pd.read_csv('data/titles.csv')
titles.head()


Out[3]:
title year
0 Tomorrow Ends at Dawn 2002
1 Brothers of the West 1937
2 Nemo 1984
3 Pereezd 2014
4 Bad for Business 2007

In [4]:
cast = pd.read_csv('data/cast.csv')
cast.head()


Out[4]:
title year name type character n
0 Suuri illusioni 1985 Homo $ actor Guests 22
1 Gangsta Rap: The Glockumentary 2007 Too $hort actor Himself NaN
2 Menace II Society 1993 Too $hort actor Lew-Loc 27
3 Porndogs: The Adventures of Sadie 2009 Too $hort actor Bosco 3
4 Stop Pepper Palmer 2014 Too $hort actor Himself NaN

In [5]:
# How many movies are listed in the titles dataframe?

len(titles)


Out[5]:
209286

In [6]:
# What are the earliest two films listed in the titles dataframe?

titles.sort_values('year').head(2)


Out[6]:
title year
2756 Miss Jerry 1894
132322 Reproduction of the Corbett and Fitzsimmons Fight 1897

In [7]:
# How many movies have the title "Hamlet"?

len(titles[titles.title == 'Hamlet'])


Out[7]:
19

In [8]:
# How many movies are titled "North by Northwest"?

len(titles[titles.title == 'North by Northwest'])


Out[8]:
1

In [9]:
# When was the first movie titled "Hamlet" made?

titles[titles.title == 'Hamlet'].sort_values('year').head(1)


Out[9]:
title year
119520 Hamlet 1910

In [10]:
# List all of the "Treasure Island" movies from earliest to most recent.

titles[titles.title == 'Treasure Island'].sort_values('year')


Out[10]:
title year
89106 Treasure Island 1918
196051 Treasure Island 1920
154459 Treasure Island 1934
174074 Treasure Island 1950
72215 Treasure Island 1972
66550 Treasure Island 1973
18201 Treasure Island 1985
80930 Treasure Island 1999

In [11]:
# How many movies were made in the year 1950?

len(titles[titles.year == 1950])


Out[11]:
1029

In [12]:
# How many movies were made in the year 1960?

len(titles[titles.year == 1960])


Out[12]:
1419

In [13]:
# How many movies were made from 1950 through 1959?

t = titles
len(t[(t.year >= 1950) & (t.year <= 1959)])


Out[13]:
11989

In [14]:
len(t[t.year // 10 == 195])


Out[14]:
11989

In [15]:
# In what years has a movie titled "Batman" been released?

t = titles
t[t.title == 'Batman']


Out[15]:
title year
535 Batman 1943
132676 Batman 1989

In [16]:
# How many roles were there in the movie "Inception"?

c = cast
len(c[c.title == 'Inception'])


Out[16]:
72

In [17]:
# How many roles in the movie "Inception" are NOT ranked by an "n" value?

c = cast
c = c[c.title == 'Inception']
c = c[c.n.isnull()]
len(c)


Out[17]:
21

In [18]:
# But how many roles in the movie "Inception" did receive an "n" value?

c = cast
c = c[c.title == 'Inception']
c = c[c.n.notnull()]
len(c)


Out[18]:
51

In [19]:
# Display the cast of "North by Northwest" in their correct "n"-value order,
# ignoring roles that did not earn a numeric "n" value.

c = cast
c = c[c.title == 'North by Northwest']
c = c[c.n.notnull()]
c = c.sort_values('n')
c


Out[19]:
title year name type character n
757365 North by Northwest 1959 Cary Grant actor Roger O. Thornhill 1
3017825 North by Northwest 1959 Eva Marie Saint actress Eve Kendall 2
1265989 North by Northwest 1959 James Mason actor Phillip Vandamm 3
2716993 North by Northwest 1959 Jessie Royce Landis actress Clara Thornhill 4
308625 North by Northwest 1959 Leo G. Carroll actor The Professor 5
2627970 North by Northwest 1959 Josephine Hutchinson actress Mrs. Townsend 6
1473804 North by Northwest 1959 Philip Ober actor Lester Townsend 7
1107146 North by Northwest 1959 Martin Landau actor Leonard 8
2122318 North by Northwest 1959 Adam Williams actor Valerian 9
1573755 North by Northwest 1959 Edward Platt actor Victor Larrabee 10
578418 North by Northwest 1959 Robert Ellenstein actor Licht 11
1990909 North by Northwest 1959 Les Tremayne actor Auctioneer 12
402492 North by Northwest 1959 Philip Coolidge actor Dr. Cross 13
1311473 North by Northwest 1959 Patrick McVey actor Sergeant Flamm 14
176953 North by Northwest 1959 Edward Binns actor Captain Junket 15
1202896 North by Northwest 1959 Ken Lynch actor Charley - Chicago Policeman 16

In [20]:
# Display the entire cast, in "n"-order, of the 1972 film "Sleuth".

c = cast
c = c[c.title == 'Sleuth']
c = c[c.year == 1972]
c = c.sort_values('n')
c


Out[20]:
title year name type character n
1482373 Sleuth 1972 Laurence Olivier actor Andrew Wyke 1
282327 Sleuth 1972 Michael Caine actor Milo Tindle 2
323726 Sleuth 1972 Alec Cawthorne actor Inspector Doppler 3
1273111 Sleuth 1972 John (II) Matthews actor Detective Sergeant Tarrant 4
2356004 Sleuth 1972 Eve (III) Channing actress Marguerite Wyke 5
1258789 Sleuth 1972 Teddy Martin actor Police Constable Higgs 6

In [21]:
# Now display the entire cast, in "n"-order, of the 2007 version of "Sleuth".

c = cast
c = c[c.title == 'Sleuth']
c = c[c.year == 2007]
c = c.sort_values('n')
c


Out[21]:
title year name type character n
282328 Sleuth 2007 Michael Caine actor Andrew 1
1123295 Sleuth 2007 Jude Law actor Milo 2
1569094 Sleuth 2007 Harold Pinter actor Man on T.V. 3
223724 Sleuth 2007 Kenneth Branagh actor Other Man on T.V. NaN
323727 Sleuth 2007 Alec (II) Cawthorne actor Inspector Doppler NaN
2356003 Sleuth 2007 Eve (II) Channing actress Marguerite Wyke NaN
2895151 Sleuth 2007 Carmel O'Sullivan actress Maggie NaN

In [22]:
# How many roles were credited in the silent 1921 version of Hamlet?

c = cast
c = c[(c.title == 'Hamlet') & (c.year == 1921)]
len(c)


Out[22]:
9

In [23]:
# How many roles were credited in Branagh’s 1996 Hamlet?

c = cast
c = c[(c.title == 'Hamlet') & (c.year == 1996)]
len(c)


Out[23]:
55

In [24]:
# How many "Hamlet" roles have been listed in all film credits through history?

c = cast
c = c[c.character == 'Hamlet']
len(c)


Out[24]:
81

In [25]:
# How many people have played an "Ophelia"?

c = cast
c = c[c.character == 'Ophelia']
len(c)

# That was my original answer.
# But 9peppe on GitHub points out I am wrong! Instead:

len(cast[cast.character == "Ophelia"].name.unique())

# Can you work out why the two answers are different?


Out[25]:
93

In [26]:
# How many people have played a role called "The Dude"?

c = cast
c = c[c.character == 'The Dude']
len(c)


Out[26]:
16

In [27]:
# How many people have played a role called "The Stranger"?

c = cast
len(c[c.character == 'The Stranger'].name.unique())


Out[27]:
186

In [28]:
# How many roles has Sidney Poitier played throughout his career?

c = cast
c = c[c.name == 'Sidney Poitier']
len(c)


Out[28]:
43

In [29]:
# How many roles has Judi Dench played?

c = cast
c = c[c.name == 'Judi Dench']
len(c)


Out[29]:
51

In [30]:
# List the supporting roles (having n=2) played by Cary Grant in the 1940s,
# in order by year.

c = cast
c = c[c.name == 'Cary Grant']
c = c[c.year // 10 == 194]
c = c[c.n == 2]
c = c.sort_values('year')
c


Out[30]:
title year name type character n
757362 My Favorite Wife 1940 Cary Grant actor Nick 2
757372 Penny Serenade 1941 Cary Grant actor Roger Adams 2

In [31]:
# List the leading roles that Cary Grant played in the 1940s in order by year.

c = cast
c = c[c.name == 'Cary Grant']
c = c[c.year // 10 == 194]
c = c[c.n == 1 ]
c = c.sort_values('year')
c


Out[31]:
title year name type character n
757387 The Howards of Virginia 1940 Cary Grant actor Matt Howard 1
757344 His Girl Friday 1940 Cary Grant actor Walter Burns 1
757389 The Philadelphia Story 1940 Cary Grant actor C.K. Dexter Haven 1
757377 Suspicion 1941 Cary Grant actor Johnnie 1
757391 The Talk of the Town 1942 Cary Grant actor Leopold Dilg 1
757368 Once Upon a Honeymoon 1942 Cary Grant actor Patrick 'Pat' O'Toole 1
757335 Destination Tokyo 1943 Cary Grant actor Capt. Cassidy 1
757360 Mr. Lucky 1943 Cary Grant actor Joe Adams 1
757361 Mr. Lucky 1943 Cary Grant actor Joe Bascopolous 1
757369 Once Upon a Time 1944 Cary Grant actor Jerry Flynn 1
757327 Arsenic and Old Lace 1944 Cary Grant actor Mortimer Brewster 1
757364 None But the Lonely Heart 1944 Cary Grant actor Ernie Mott 1
757363 Night and Day 1946 Cary Grant actor Cole Porter 1
757366 Notorious 1946 Cary Grant actor Devlin 1
757383 The Bachelor and the Bobby-Soxer 1947 Cary Grant actor Dick 1
757384 The Bishop's Wife 1947 Cary Grant actor Dudley 1
757359 Mr. Blandings Builds His Dream House 1948 Cary Grant actor Jim Blandings 1
757339 Every Girl Should Be Married 1948 Cary Grant actor Dr. Madison Brown 1
757348 I Was a Male War Bride 1949 Cary Grant actor Capt. Henri Rochard 1

In [32]:
# How many roles were available for actors in the 1950s?

c = cast
c = c[c.year // 10 == 195]
c = c[c.type == 'actor']
len(c)


Out[32]:
146164

In [33]:
# How many roles were available for actresses in the 1950s?

c = cast
c = c[c.year // 10 == 195]
c = c[c.type == 'actress']
len(c)


Out[33]:
53430

In [34]:
# How many leading roles (n=1) were available
# from the beginning of film history through 1980?

c = cast
c = c[c.year <= 1980]
c = c[c.n == 1]
len(c)


Out[34]:
61005

In [35]:
# How many non-leading roles were available through
# from the beginning of film history through 1980?

c = cast
c = c[c.year <= 1980]
c = c[c.n != 1]
len(c)


Out[35]:
1037396

In [36]:
# How many roles through 1980 were minor enough
# that they did not warrant a numeric "n" rank?

c = cast
c = c[c.year <= 1980]
c = c[c.n.isnull()]
len(c)


Out[36]:
409437

In [ ]: