notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import pandas as pd



In [2]:

    
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))









    Out[2]:



In [3]:

    
titles = pd.read_csv('data/titles.csv')
titles.head()









    Out[3]:






  
    
      
      title
      year
    
  
  
    
      0
      Tomorrow Ends at Dawn
      2002
    
    
      1
      Brothers of the West
      1937
    
    
      2
      Nemo
      1984
    
    
      3
      Pereezd
      2014
    
    
      4
      Bad for Business
      2007



In [4]:

    
cast = pd.read_csv('data/cast.csv')
cast.head()









    Out[4]:






  
    
      
      title
      year
      name
      type
      character
      n
    
  
  
    
      0
      Suuri illusioni
      1985
      Homo $
      actor
      Guests
      22
    
    
      1
      Gangsta Rap: The Glockumentary
      2007
      Too $hort
      actor
      Himself
      NaN
    
    
      2
      Menace II Society
      1993
      Too $hort
      actor
      Lew-Loc
      27
    
    
      3
      Porndogs: The Adventures of Sadie
      2009
      Too $hort
      actor
      Bosco
      3
    
    
      4
      Stop Pepper Palmer
      2014
      Too $hort
      actor
      Himself
      NaN



In [ ]:



In [5]:

    
# Using groupby(), plot the number of films
# that have been released each decade in the history of cinema.

t = titles
t.groupby(t.year // 10 * 10).size().plot(kind='bar')









    Out[5]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa259c41978>



In [6]:

    
# Use groupby() to plot the number of "Hamlet" films made each decade.

t = titles
t = t[t.title == 'Hamlet']
t.groupby(t.year // 10 * 10).size().plot(kind='bar')









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa26f513518>



In [7]:

    
# How many leading (n=1) roles were available to actors,
# and how many to actresses, in each year of the 1950s?

c = cast
c = c[c.year // 10 == 195]
c = c[c.n == 1]
c.groupby(['year', 'type']).size()









    Out[7]:





year  type   
1950  actor      603
      actress    267
1951  actor      626
      actress    272
1952  actor      590
      actress    282
1953  actor      629
      actress    283
1954  actor      620
      actress    298
1955  actor      602
      actress    263
1956  actor      609
      actress    284
1957  actor      703
      actress    281
1958  actor      695
      actress    273
1959  actor      672
      actress    286
dtype: int64



In [8]:

    
# In the 1950s decade taken as a whole,
# how many total roles were available to actors,
# and how many to actresses, for each "n" number 1 through 5?

c = cast
c = c[c.year // 10 * 10 == 1950]
c = c[c.n < 6]
c.groupby(['n', 'type']).size()









    Out[8]:





n  type   
1  actor      6349
   actress    2789
2  actor      4354
   actress    4375
3  actor      5370
   actress    3005
4  actor      5370
   actress    2735
5  actor      5403
   actress    2426
dtype: int64



In [9]:

    
# Use groupby() to determine how many roles are listed
# for each of the Pink Panther movies.

c = cast
c = c[c.title == 'The Pink Panther']
c = c.sort_values('n').groupby(['year'])[['n']].max()
c



In [10]:

    
# List, in order by year, each of the films
# in which Frank Oz has played more than 1 role.

c = cast
c = c[c.name == 'Frank Oz']
g = c.groupby(['year', 'title']).size()
g[g > 1]









    Out[10]:





year  title                                   
1979  The Muppet Movie                            8
1981  An American Werewolf in London              2
      The Great Muppet Caper                      6
1982  The Dark Crystal                            2
1984  The Muppets Take Manhattan                  7
1985  Sesame Street Presents: Follow that Bird    3
1992  The Muppet Christmas Carol                  7
1996  Muppet Treasure Island                      4
1999  Muppets from Space                          4
      The Adventures of Elmo in Grouchland        3
dtype: int64



In [11]:

    
# List each of the characters that Frank Oz
# has portrayed at least twice.

c = cast
c = c[c.name == 'Frank Oz']
g = c.groupby(['character']).size()
g[g > 1].order()









    Out[11]:





character
Grover            2
Bert              3
Cookie Monster    3
Fozzie Bear       4
Sam the Eagle     5
Yoda              5
Animal            6
Miss Piggy        6
dtype: int64



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	n
year
1963	15
2006	50

	title	year
0	Tomorrow Ends at Dawn	2002
1	Brothers of the West	1937
2	Nemo	1984
3	Pereezd	2014
4	Bad for Business	2007

	title	year	name	type	character	n
0	Suuri illusioni	1985	Homo $	actor	Guests	22
1	Gangsta Rap: The Glockumentary	2007	Too $hort	actor	Himself	NaN
2	Menace II Society	1993	Too $hort	actor	Lew-Loc	27
3	Porndogs: The Adventures of Sadie	2009	Too $hort	actor	Bosco	3
4	Stop Pepper Palmer	2014	Too $hort	actor	Himself	NaN