In [2]:
%matplotlib inline
import pandas as pd

In [3]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[3]:

In [4]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()


Out[4]:
title year
0 Ligaw na daigdig 1962
1 Sluby ulanskie 1934
2 The House of the Seven Gables 1940
3 Mandala - Il simbolo 2008
4 Shi bian 1958

In [5]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()


Out[5]:
title year name type character n
0 Suuri illusioni 1985 Homo $ actor Guests 22
1 Gangsta Rap: The Glockumentary 2007 Too $hort actor Himself NaN
2 Menace II Society 1993 Too $hort actor Lew-Loc 27
3 Porndogs: The Adventures of Sadie 2009 Too $hort actor Bosco 3
4 Stop Pepper Palmer 2014 Too $hort actor Himself NaN

How many movies are listed in the titles dataframe?


In [6]:
len(cast)


Out[6]:
3376758

In [ ]:

What are the earliest two films listed in the titles dataframe?


In [7]:
titles.sort('year').head(2)


/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[7]:
title year
160802 Miss Jerry 1894
106671 Reproduction of the Corbett and Jeffries Fight 1899

In [ ]:

How many movies have the title "Hamlet"?


In [8]:
len(titles[titles['title'] == "Hamlet"])


Out[8]:
19

In [ ]:

How many movies are titled "North by Northwest"?


In [9]:
len(titles[titles['title'] == "North by Northwest"])


Out[9]:
1

In [ ]:

When was the first movie titled "Hamlet" made?


In [10]:
titles[titles['title'] == "Hamlet"].sort("year").head(1)


/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[10]:
title year
206783 Hamlet 1910

In [ ]:

List all of the "Treasure Island" movies from earliest to most recent.


In [11]:
titles[titles.title == 'Treasure Island'].sort("year")


/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[11]:
title year
194782 Treasure Island 1918
128127 Treasure Island 1920
125990 Treasure Island 1934
126741 Treasure Island 1950
196371 Treasure Island 1972
131013 Treasure Island 1973
59742 Treasure Island 1985
158135 Treasure Island 1999

In [ ]:

How many movies were made in the year 1950?


In [12]:
len(titles[titles.year ==1950])


Out[12]:
1037

In [ ]:

How many movies were made in the year 1960?


In [13]:
len(titles[titles.year ==1960])


Out[13]:
1476

In [ ]:

How many movies were made from 1950 through 1959?


In [14]:
len(titles[(titles.year >=1950) & (titles.year <=1959)])
# t = titles
# len(t[(t.year >= 1950) & (t.year <= 1959)])


Out[14]:
12194

In [ ]:

In what years has a movie titled "Batman" been released?


In [15]:
titles[titles.title == "Batman"].year


Out[15]:
131911    1943
175261    1989
Name: year, dtype: int64

In [ ]:

How many roles were there in the movie "Inception"?


In [27]:
len(cast[cast["title"] == "Inception"])


Out[27]:
24

In [ ]:

How many roles in the movie "Inception" are NOT ranked by an "n" value?


In [26]:
len(cast[cast.n.isnull()][cast["title"] == "Inception"])


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[26]:
24

In [ ]:

But how many roles in the movie "Inception" did receive an "n" value?


In [33]:
len(cast[cast.n.notnull()][cast.title == "Inception"])


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[33]:
51

In [ ]:

Display the cast of "North by Northwest" in their correct "n"-value order, ignoring roles that did not earn a numeric "n" value.


In [34]:
cast[cast.title == "North by Northwest" ][cast.n.notnull()].sort('n')


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[34]:
title year name type character n
788141 North by Northwest 1959 Cary Grant actor Roger O. Thornhill 1
3144487 North by Northwest 1959 Eva Marie Saint actress Eve Kendall 2
1317332 North by Northwest 1959 James Mason actor Phillip Vandamm 3
2830414 North by Northwest 1959 Jessie Royce Landis actress Clara Thornhill 4
321472 North by Northwest 1959 Leo G. Carroll actor The Professor 5
2736948 North by Northwest 1959 Josephine Hutchinson actress Mrs. Townsend 6
1533280 North by Northwest 1959 Philip Ober actor Lester Townsend 7
1152309 North by Northwest 1959 Martin Landau actor Leonard 8
2210173 North by Northwest 1959 Adam Williams actor Valerian 9
1637573 North by Northwest 1959 Edward Platt actor Victor Larrabee 10
601900 North by Northwest 1959 Robert Ellenstein actor Licht 11
2073368 North by Northwest 1959 Les Tremayne actor Auctioneer 12
419261 North by Northwest 1959 Philip Coolidge actor Dr. Cross 13
1364285 North by Northwest 1959 Patrick McVey actor Sergeant Flamm 14
184752 North by Northwest 1959 Edward Binns actor Captain Junket 15
1251695 North by Northwest 1959 Ken Lynch actor Charley - Chicago Policeman 16

In [ ]:

Display the entire cast, in "n"-order, of the 1972 film "Sleuth".


In [36]:
cast[(cast.title == "Sleuth") & (cast.year == 1972)][cast.n.notnull()].sort("n")


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[36]:
title year name type character n
1542273 Sleuth 1972 Laurence Olivier actor Andrew Wyke 1
294241 Sleuth 1972 Michael Caine actor Milo Tindle 2
337099 Sleuth 1972 Alec Cawthorne actor Inspector Doppler 3
1324763 Sleuth 1972 John (II) Matthews actor Detective Sergeant Tarrant 4
2453907 Sleuth 1972 Eve (III) Channing actress Marguerite Wyke 5
1309798 Sleuth 1972 Teddy Martin actor Police Constable Higgs 6

In [ ]:

Now display the entire cast, in "n"-order, of the 2007 version of "Sleuth".


In [37]:
cast[(cast.title == "Sleuth") & (cast.year == 2007)][cast.n.notnull()].sort("n")


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[37]:
title year name type character n
294242 Sleuth 2007 Michael Caine actor Andrew 1
1169098 Sleuth 2007 Jude Law actor Milo 2
1632687 Sleuth 2007 Harold Pinter actor Man on T.V. 3

In [ ]:

How many roles were credited in the silent 1921 version of Hamlet?


In [38]:
len(cast[(cast.title == "Hamlet") & (cast.year == 1921)][cast.n.notnull()])


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[38]:
9

In [ ]:

How many roles were credited in Branagh’s 1996 Hamlet?


In [41]:
len(cast[(cast.title == "Hamlet") & (cast.year == 1996)][cast.n.notnull()])


/home/ubuntu/miniconda3/lib/python3.5/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[41]:
46

In [ ]:

How many "Hamlet" roles have been listed in all film credits through history?


In [45]:
len(cast[cast.character == "Hamlet"])


Out[45]:
84

In [ ]:

How many people have played an "Ophelia"?


In [44]:
len(cast[cast.character == "Ophelia"] )


Out[44]:
99

In [ ]:

How many people have played a role called "The Dude"?


In [46]:
len(cast[cast.character == "The Dude"] )


Out[46]:
17

In [ ]:

How many people have played a role called "The Stranger"?


In [47]:
len(cast[cast.character == "The Stranger"] )


Out[47]:
191

In [ ]:

How many roles has Sidney Poitier played throughout his career?


In [52]:
c = cast
c = c[c.name == 'Sidney Poitier']
len(c)


Out[52]:
43

In [ ]:

How many roles has Judi Dench played?


In [53]:
c = cast
c = c[c.name == 'Judi Dench']
len(c)


Out[53]:
53

In [ ]:

List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year.


In [55]:
c = cast
c = c[(c.name == 'Cary Grant') & (c.n == 2) &(c.year >=1940) & (c.year<1950)].sort("year")
c


/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  app.launch_new_instance()
Out[55]:
title year name type character n
788138 My Favorite Wife 1940 Cary Grant actor Nick 2
788148 Penny Serenade 1941 Cary Grant actor Roger Adams 2

In [ ]:

List the leading roles that Cary Grant played in the 1940s in order by year.


In [57]:
c = cast
c = c[(c.name == 'Cary Grant') & (c.n == 1) &(c.year >=1940) & (c.year<1950)].sort("year")
c


/home/ubuntu/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  from ipykernel import kernelapp as app
Out[57]:
title year name type character n
788163 The Howards of Virginia 1940 Cary Grant actor Matt Howard 1
788120 His Girl Friday 1940 Cary Grant actor Walter Burns 1
788165 The Philadelphia Story 1940 Cary Grant actor C. K. Dexter Haven 1
788153 Suspicion 1941 Cary Grant actor Johnnie 1
788167 The Talk of the Town 1942 Cary Grant actor Leopold Dilg 1
788144 Once Upon a Honeymoon 1942 Cary Grant actor Patrick 'Pat' O'Toole 1
788111 Destination Tokyo 1943 Cary Grant actor Capt. Cassidy 1
788136 Mr. Lucky 1943 Cary Grant actor Joe Adams 1
788137 Mr. Lucky 1943 Cary Grant actor Joe Bascopolous 1
788145 Once Upon a Time 1944 Cary Grant actor Jerry Flynn 1
788103 Arsenic and Old Lace 1944 Cary Grant actor Mortimer Brewster 1
788140 None But the Lonely Heart 1944 Cary Grant actor Ernie Mott 1
788139 Night and Day 1946 Cary Grant actor Cole Porter 1
788142 Notorious 1946 Cary Grant actor Devlin 1
788159 The Bachelor and the Bobby-Soxer 1947 Cary Grant actor Dick Nugent 1
788160 The Bishop's Wife 1947 Cary Grant actor Dudley 1
788135 Mr. Blandings Builds His Dream House 1948 Cary Grant actor Jim Blandings 1
788115 Every Girl Should Be Married 1948 Cary Grant actor Dr. Madison Brown 1
788124 I Was a Male War Bride 1949 Cary Grant actor Capt. Henri Rochard 1

In [ ]:

How many roles were available for actors in the 1950s?


In [ ]:
c = cast
c = c[(c.name == 'Cary Grant') & (c.n == 1) &(c.year >=1940) & (c.year<1950)].sort("year")
c

In [ ]:

How many roles were avilable for actresses in the 1950s?


In [59]:
len(cast[(cast.type == "actress") &(cast.year >=1940) & (cast.year <1950)])


Out[59]:
50210

In [ ]:

How many leading roles (n=1) were available from the beginning of film history through 1980?


In [60]:
len(cast[(cast.n==1)&(cast.year <=1980)])


Out[60]:
62203

In [ ]:

How many non-leading roles were available through from the beginning of film history through 1980?


In [61]:
len(cast[(cast.n!=1)&(cast.year <=1980)])


Out[61]:
1064947

In [ ]:

How many roles through 1980 were minor enough that they did not warrant a numeric "n" rank?


In [64]:
len(cast[(cast.n.isnull())&(cast.year <=1980)])


Out[64]:
424288

In [ ]: