In [1]:
%matplotlib inline
import pandas as pd
In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[2]:
In [3]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[3]:
In [4]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[4]:
In [5]:
# What are the ten most common movie names of all time?
titles.title.value_counts().head(10)
Out[5]:
In [6]:
# Which three years of the 1930s saw the most films released?
t = titles
t = t[t.year // 10 == 193]
t.year.value_counts().head(3)
Out[6]:
In [7]:
# Plot the number of films that have been released each decade
# over the history of cinema.
t = titles
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[7]:
In [8]:
# Plot the number of "Hamlet" films made each decade.
t = titles
t = t[t.title == 'Hamlet']
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[8]:
In [9]:
# Plot the number of "Rustler" characters
# in each decade of the history of film.
c = cast
c = c[c.character == 'Rustler']
(c.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[9]:
In [10]:
# Plot the number of "Hamlet" characters each decade.
c = cast
c = c[c.character == 'Hamlet']
(c.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[10]:
In [11]:
# What are the 11 most common character names in movie history?
cast.character.value_counts().head(11)
Out[11]:
In [12]:
# Who are the 10 people most often credited as "Herself" in film history?
c = cast
c[c.character == 'Herself'].name.value_counts().head(10)
Out[12]:
In [13]:
# Who are the 10 people most often credited as "Himself" in film history?
c = cast
c[c.character == 'Himself'].name.value_counts().head(10)
Out[13]:
In [14]:
# Which actors or actresses appeared in the most movies in the year 1945?
cast[cast.year == 1945].name.value_counts().head(10)
Out[14]:
In [15]:
# Which actors or actresses appeared in the most movies in the year 1985?
cast[cast.year == 1985].name.value_counts().head(10)
Out[15]:
In [16]:
# Plot how many roles Mammootty has played in each year of his career.
cast[cast.name == 'Mammootty'].year.value_counts().sort_index().plot()
Out[16]:
In [17]:
# What are the 10 most frequent roles that start with the phrase "Patron in"?
c = cast
c[c.character.str.startswith('Patron in ')].character.value_counts().head(10)
Out[17]:
In [18]:
# What are the 10 most frequent roles that start with the word "Science"?
c = cast
c[c.character.str.startswith('Science')].character.value_counts().head(10)
Out[18]:
In [19]:
# Plot the n-values of the roles that Judi Dench has played over her career.
c = cast
c = c[c.name == 'Judi Dench'].sort('year')
c = c[c.n.notnull()]
c.plot(x='year', y='n', kind='scatter')
Out[19]:
In [20]:
# Plot the n-values of Cary Grant's roles through his career.
c = cast
c = c[c.name == 'Cary Grant'].sort('year')
c = c[c.n.notnull()]
c.plot(x='year', y='n', kind='scatter')
Out[20]:
In [21]:
# Plot the n-value of the roles that Sidney Poitier has acted
# over the years.
c = cast
c = c[c.name == 'Sidney Poitier'].sort('year')
c = c[c.n.notnull()]
c.plot(x='year', y='n', kind='scatter')
Out[21]:
In [22]:
# How many leading (n=1) roles were available to actors,
# and how many to actresses, in the 1950s?
c = cast
c = c[c.year // 10 == 195]
c = c[c.n == 1]
c.type.value_counts()
Out[22]:
In [23]:
# How many supporting (n=2) roles were available to actors,
# and how many to actresses, in the 1950s?
c = cast
c = c[c.year // 10 == 195]
c = c[c.n == 2]
c.type.value_counts()
Out[23]:
In [ ]: