In [1]:
%matplotlib inline
import pandas as pd

In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[2]:

In [3]:
titles = pd.read_csv('data/titles.csv')
titles.head()


Out[3]:
title year
0 Tomorrow Ends at Dawn 2002
1 Brothers of the West 1937
2 Nemo 1984
3 Pereezd 2014
4 Bad for Business 2007

In [4]:
cast = pd.read_csv('data/cast.csv')
cast.head()


Out[4]:
title year name type character n
0 Suuri illusioni 1985 Homo $ actor Guests 22
1 Gangsta Rap: The Glockumentary 2007 Too $hort actor Himself NaN
2 Menace II Society 1993 Too $hort actor Lew-Loc 27
3 Porndogs: The Adventures of Sadie 2009 Too $hort actor Bosco 3
4 Stop Pepper Palmer 2014 Too $hort actor Himself NaN

In [ ]:


In [5]:
# Using groupby(), plot the number of films
# that have been released each decade in the history of cinema.

t = titles
t.groupby(t.year // 10 * 10).size().plot(kind='bar')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa259c41978>

In [6]:
# Use groupby() to plot the number of "Hamlet" films made each decade.

t = titles
t = t[t.title == 'Hamlet']
t.groupby(t.year // 10 * 10).size().plot(kind='bar')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa26f513518>

In [7]:
# How many leading (n=1) roles were available to actors,
# and how many to actresses, in each year of the 1950s?

c = cast
c = c[c.year // 10 == 195]
c = c[c.n == 1]
c.groupby(['year', 'type']).size()


Out[7]:
year  type   
1950  actor      603
      actress    267
1951  actor      626
      actress    272
1952  actor      590
      actress    282
1953  actor      629
      actress    283
1954  actor      620
      actress    298
1955  actor      602
      actress    263
1956  actor      609
      actress    284
1957  actor      703
      actress    281
1958  actor      695
      actress    273
1959  actor      672
      actress    286
dtype: int64

In [8]:
# In the 1950s decade taken as a whole,
# how many total roles were available to actors,
# and how many to actresses, for each "n" number 1 through 5?

c = cast
c = c[c.year // 10 * 10 == 1950]
c = c[c.n < 6]
c.groupby(['n', 'type']).size()


Out[8]:
n  type   
1  actor      6349
   actress    2789
2  actor      4354
   actress    4375
3  actor      5370
   actress    3005
4  actor      5370
   actress    2735
5  actor      5403
   actress    2426
dtype: int64

In [9]:
# Use groupby() to determine how many roles are listed
# for each of the Pink Panther movies.

c = cast
c = c[c.title == 'The Pink Panther']
c = c.sort_values('n').groupby(['year'])[['n']].max()
c


Out[9]:
n
year
1963 15
2006 50

In [10]:
# List, in order by year, each of the films
# in which Frank Oz has played more than 1 role.

c = cast
c = c[c.name == 'Frank Oz']
g = c.groupby(['year', 'title']).size()
g[g > 1]


Out[10]:
year  title                                   
1979  The Muppet Movie                            8
1981  An American Werewolf in London              2
      The Great Muppet Caper                      6
1982  The Dark Crystal                            2
1984  The Muppets Take Manhattan                  7
1985  Sesame Street Presents: Follow that Bird    3
1992  The Muppet Christmas Carol                  7
1996  Muppet Treasure Island                      4
1999  Muppets from Space                          4
      The Adventures of Elmo in Grouchland        3
dtype: int64

In [11]:
# List each of the characters that Frank Oz
# has portrayed at least twice.

c = cast
c = c[c.name == 'Frank Oz']
g = c.groupby(['character']).size()
g[g > 1].order()


Out[11]:
character
Grover            2
Bert              3
Cookie Monster    3
Fozzie Bear       4
Sam the Eagle     5
Yoda              5
Animal            6
Miss Piggy        6
dtype: int64

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: