In [1]:
%matplotlib inline
import pandas as pd
In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[2]:
In [3]:
titles = pd.read_csv('data/titles.csv')
titles.head()
Out[3]:
In [4]:
cast = pd.read_csv('data/cast.csv')
cast.head()
Out[4]:
In [ ]:
In [5]:
# Using groupby(), plot the number of films
# that have been released each decade in the history of cinema.
t = titles
t.groupby(t.year // 10 * 10).size().plot(kind='bar')
Out[5]:
In [6]:
# Use groupby() to plot the number of "Hamlet" films made each decade.
t = titles
t = t[t.title == 'Hamlet']
t.groupby(t.year // 10 * 10).size().plot(kind='bar')
Out[6]:
In [7]:
# How many leading (n=1) roles were available to actors,
# and how many to actresses, in each year of the 1950s?
c = cast
c = c[c.year // 10 == 195]
c = c[c.n == 1]
c.groupby(['year', 'type']).size()
Out[7]:
In [8]:
# In the 1950s decade taken as a whole,
# how many total roles were available to actors,
# and how many to actresses, for each "n" number 1 through 5?
c = cast
c = c[c.year // 10 * 10 == 1950]
c = c[c.n < 6]
c.groupby(['n', 'type']).size()
Out[8]:
In [9]:
# Use groupby() to determine how many roles are listed
# for each of the Pink Panther movies.
c = cast
c = c[c.title == 'The Pink Panther']
c = c.sort_values('n').groupby(['year'])[['n']].max()
c
Out[9]:
In [10]:
# List, in order by year, each of the films
# in which Frank Oz has played more than 1 role.
c = cast
c = c[c.name == 'Frank Oz']
g = c.groupby(['year', 'title']).size()
g[g > 1]
Out[10]:
In [11]:
# List each of the characters that Frank Oz
# has portrayed at least twice.
c = cast
c = c[c.name == 'Frank Oz']
g = c.groupby(['character']).size()
g[g > 1].order()
Out[11]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: