notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import pandas as pd



In [2]:

    
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))









    Out[2]:



In [3]:

    
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()









    Out[3]:






  
    
      
      title
      year
    
  
  
    
      0
      Ligaw na daigdig
      1962
    
    
      1
      Sluby ulanskie
      1934
    
    
      2
      The House of the Seven Gables
      1940
    
    
      3
      Mandala - Il simbolo
      2008
    
    
      4
      Shi bian
      1958



In [4]:

    
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()









    Out[4]:






  
    
      
      title
      year
      name
      type
      character
      n
    
  
  
    
      0
      Suuri illusioni
      1985
      Homo $
      actor
      Guests
      22
    
    
      1
      Gangsta Rap: The Glockumentary
      2007
      Too $hort
      actor
      Himself
      NaN
    
    
      2
      Menace II Society
      1993
      Too $hort
      actor
      Lew-Loc
      27
    
    
      3
      Porndogs: The Adventures of Sadie
      2009
      Too $hort
      actor
      Bosco
      3
    
    
      4
      Stop Pepper Palmer
      2014
      Too $hort
      actor
      Himself
      NaN



In [ ]:



In [5]:

    
# Define a year as a "Superman year"
# whose films feature more Superman characters than Batman.
# How many years in film history have been Superman years?

c = cast
c = c[(c.character == 'Superman') | (c.character == 'Batman')]
c = c.groupby(['year', 'character']).size()
c = c.unstack()
c = c.fillna(0)
c.head()



In [6]:

    
d = c.Superman - c.Batman
print('Superman years:')
print(len(d[d > 0.0]))









    



Superman years:
13



In [7]:

    
# How many years have been "Batman years",
# with more Batman characters than Superman characters?

print('Batman years:')
print(len(d[d < 0.0]))









    



Batman years:
24



In [8]:

    
# Plot the number of actor roles each year
# and the number of actress roles each year
# over the history of film.

c = cast
c = c.groupby(['year', 'type']).size()
c = c.unstack('type')
c.plot()









    Out[8]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f90845d4748>



In [9]:

    
# Plot the number of actor roles each year
# and the number of actress roles each year,
# but this time as a kind='area' plot.

c = cast
c = c.groupby(['year', 'type']).size()
c = c.unstack('type')
c.plot(kind='area')









    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f9099ef7dd8>



In [10]:

    
# Plot the difference between the number of actor roles each year
# and the number of actress roles each year over the history of film.

c = cast
c = c.groupby(['year', 'type']).size()
c = c.unstack('type')
(c.actor - c.actress).plot()









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f9099e62fd0>



In [11]:

    
# Plot the fraction of roles that have been 'actor' roles
# each year in the hitsory of film.

c = cast
c = c.groupby(['year', 'type']).size()
c = c.unstack('type')
(c.actor / (c.actor + c.actress)).plot(ylim=[0,1])









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f9099df4128>



In [12]:

    
# Plot the fraction of supporting (n=2) roles
# that have been 'actor' roles
# each year in the history of film.

c = cast
c = c[c.n == 2]
c = c.groupby(['year', 'type']).size()
c = c.unstack('type')
(c.actor / (c.actor + c.actress)).plot(ylim=[0,1])









    Out[12]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f9099d54860>



In [13]:

    
# Build a plot with a line for each rank n=1 through n=3,
# where the line shows what fraction of that rank's roles
# were 'actor' roles for each year in the history of film.

c = cast
c = c[c.n <= 3]
c = c.groupby(['year', 'type', 'n']).size()
c = c.unstack('type')
r = c.actor / (c.actor + c.actress)
r = r.unstack('n')
r.plot(ylim=[0,1])









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f9099cc54e0>



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	title	year
0	Ligaw na daigdig	1962
1	Sluby ulanskie	1934
2	The House of the Seven Gables	1940
3	Mandala - Il simbolo	2008
4	Shi bian	1958

	title	year	name	type	character	n
0	Suuri illusioni	1985	Homo $	actor	Guests	22
1	Gangsta Rap: The Glockumentary	2007	Too $hort	actor	Himself	NaN
2	Menace II Society	1993	Too $hort	actor	Lew-Loc	27
3	Porndogs: The Adventures of Sadie	2009	Too $hort	actor	Bosco	3
4	Stop Pepper Palmer	2014	Too $hort	actor	Himself	NaN