Pandas is Crazy



In [1]:

    
import pandas as pd



In [2]:

    
%%time
cast = pd.DataFrame.from_csv('../data/intro/cast.csv.gz', index_col=None)









    



Wall time: 8.27 s



In [ ]:



In [3]:

    
cast.head()









    Out[3]:






  
    
      
      title
      year
      name
      type
      character
      n
    
  
  
    
      0
      Suuri illusioni
      1985
      Homo $
      actor
      Guests
      22.0
    
    
      1
      Battle of the Sexes
      2017
      $hutter
      actor
      Bobby Riggs Fan
      10.0
    
    
      2
      Secret in Their Eyes
      2015
      $hutter
      actor
      2002 Dodger Fan
      NaN
    
    
      3
      Steve Jobs
      2015
      $hutter
      actor
      1988 Opera House Patron
      NaN
    
    
      4
      Straight Outta Compton
      2015
      $hutter
      actor
      Club Patron
      NaN



In [ ]:



In [4]:

    
titles = cast[['title', 'year']].drop_duplicates().reset_index(drop=True)
titles.head()









    Out[4]:






  
    
      
      title
      year
    
  
  
    
      0
      Suuri illusioni
      1985
    
    
      1
      Battle of the Sexes
      2017
    
    
      2
      Secret in Their Eyes
      2015
    
    
      3
      Steve Jobs
      2015
    
    
      4
      Straight Outta Compton
      2015



In [ ]:



In [5]:

    
# What is the name and year of the very first movie ever made?

titles.sort_values('year').head(1)









    Out[5]:






  
    
      
      title
      year
    
  
  
    
      130478
      Miss Jerry
      1894



In [ ]:



In [6]:

    
# How many years into the future does the IMDB database list movie titles?

titles.sort_values('year').tail(3)









    Out[6]:






  
    
      
      title
      year
    
  
  
    
      66884
      Clutch Control
      2022
    
    
      165691
      Avatar 4
      2022
    
    
      34974
      New Rebellion
      2023



In [ ]:



In [7]:

    
# How many movies listed in `titles` came out in 1950?

len(titles[titles.year == 1950])

# or: (titles.year == 1950).sum()









    Out[7]:





901



In [ ]:



In [8]:

    
# What are the 15 most common movie titles in film history?

titles.title.value_counts().head(15)









    Out[8]:





Hamlet                       17
Macbeth                      15
Carmen                       13
The Outsider                 11
Maya                         11
She                          10
Temptation                   10
Anna Karenina                10
Othello                      10
The Three Musketeers          9
Vengeance                     9
Rage                          9
A Midsummer Night's Dream     9
Blood Money                   9
Bad Blood                     9
Name: title, dtype: int64



In [ ]:



In [9]:

    
# How many movies has Leonardo DiCaprio acted in?

leo = cast[cast.name == 'Leonardo DiCaprio']
leo









    Out[9]:






  
    
      
      title
      year
      name
      type
      character
      n
    
  
  
    
      553812
      Blood Diamond
      2006
      Leonardo DiCaprio
      actor
      Danny Archer
      1.0
    
    
      553813
      Body of Lies
      2008
      Leonardo DiCaprio
      actor
      Roger Ferris
      1.0
    
    
      553814
      Catch Me If You Can
      2002
      Leonardo DiCaprio
      actor
      Frank Abagnale Jr.
      1.0
    
    
      553815
      Celebrity
      1998
      Leonardo DiCaprio
      actor
      Brandon Darrow
      93.0
    
    
      553816
      Critters 3
      1991
      Leonardo DiCaprio
      actor
      Josh
      7.0
    
    
      553817
      Django Unchained
      2012
      Leonardo DiCaprio
      actor
      Calvin Candie
      3.0
    
    
      553818
      Don's Plum
      2001
      Leonardo DiCaprio
      actor
      Derek
      4.0
    
    
      553819
      Gangs of New York
      2002
      Leonardo DiCaprio
      actor
      Amsterdam Vallon
      1.0
    
    
      553820
      Inception
      2010
      Leonardo DiCaprio
      actor
      Cobb
      1.0
    
    
      553821
      J. Edgar
      2011
      Leonardo DiCaprio
      actor
      J. Edgar Hoover
      1.0
    
    
      553822
      Marvin's Room
      1996
      Leonardo DiCaprio
      actor
      Hank
      2.0
    
    
      553823
      Model Culture: One + Night in Bangkok
      2009
      Leonardo DiCaprio
      actor
      Himself
      NaN
    
    
      553824
      Poison Ivy
      1992
      Leonardo DiCaprio
      actor
      Guy
      9.0
    
    
      553825
      Revolutionary Road
      2008
      Leonardo DiCaprio
      actor
      Frank Wheeler
      2.0
    
    
      553826
      Romeo + Juliet
      1996
      Leonardo DiCaprio
      actor
      Romeo
      1.0
    
    
      553827
      Shutter Island
      2010
      Leonardo DiCaprio
      actor
      Teddy Daniels
      1.0
    
    
      553828
      The Aviator
      2004
      Leonardo DiCaprio
      actor
      Howard Hughes
      1.0
    
    
      553829
      The Basketball Diaries
      1995
      Leonardo DiCaprio
      actor
      Jim Carroll
      1.0
    
    
      553830
      The Beach
      2000
      Leonardo DiCaprio
      actor
      Richard
      1.0
    
    
      553831
      The Departed
      2006
      Leonardo DiCaprio
      actor
      Billy
      1.0
    
    
      553832
      The Great Gatsby
      2013
      Leonardo DiCaprio
      actor
      Jay Gatsby
      12.0
    
    
      553833
      The Man in the Iron Mask
      1998
      Leonardo DiCaprio
      actor
      King Louis XIV
      1.0
    
    
      553834
      The Man in the Iron Mask
      1998
      Leonardo DiCaprio
      actor
      Philippe
      1.0
    
    
      553835
      The Quick and the Dead
      1995
      Leonardo DiCaprio
      actor
      Kid
      4.0
    
    
      553836
      The Revenant
      2015
      Leonardo DiCaprio
      actor
      Hugh Glass
      1.0
    
    
      553837
      The Wolf of Wall Street
      2013
      Leonardo DiCaprio
      actor
      Jordan Belfort
      1.0
    
    
      553838
      This Boy's Life
      1993
      Leonardo DiCaprio
      actor
      Toby
      3.0
    
    
      553839
      Titanic
      1997
      Leonardo DiCaprio
      actor
      Jack Dawson
      1.0
    
    
      553840
      Total Eclipse
      1995
      Leonardo DiCaprio
      actor
      Arthur Rimbaud
      1.0
    
    
      553841
      What's Eating Gilbert Grape
      1993
      Leonardo DiCaprio
      actor
      Arnie Grape
      2.0



In [10]:

    
len(leo)









    Out[10]:





30



In [ ]:



In [11]:

    
# What movies has Leo acted in major roles (1-3)?

leo[cast.n <= 3]









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  app.launch_new_instance()






    Out[11]:






  
    
      
      title
      year
      name
      type
      character
      n
    
  
  
    
      553812
      Blood Diamond
      2006
      Leonardo DiCaprio
      actor
      Danny Archer
      1.0
    
    
      553813
      Body of Lies
      2008
      Leonardo DiCaprio
      actor
      Roger Ferris
      1.0
    
    
      553814
      Catch Me If You Can
      2002
      Leonardo DiCaprio
      actor
      Frank Abagnale Jr.
      1.0
    
    
      553817
      Django Unchained
      2012
      Leonardo DiCaprio
      actor
      Calvin Candie
      3.0
    
    
      553819
      Gangs of New York
      2002
      Leonardo DiCaprio
      actor
      Amsterdam Vallon
      1.0
    
    
      553820
      Inception
      2010
      Leonardo DiCaprio
      actor
      Cobb
      1.0
    
    
      553821
      J. Edgar
      2011
      Leonardo DiCaprio
      actor
      J. Edgar Hoover
      1.0
    
    
      553822
      Marvin's Room
      1996
      Leonardo DiCaprio
      actor
      Hank
      2.0
    
    
      553825
      Revolutionary Road
      2008
      Leonardo DiCaprio
      actor
      Frank Wheeler
      2.0
    
    
      553826
      Romeo + Juliet
      1996
      Leonardo DiCaprio
      actor
      Romeo
      1.0
    
    
      553827
      Shutter Island
      2010
      Leonardo DiCaprio
      actor
      Teddy Daniels
      1.0
    
    
      553828
      The Aviator
      2004
      Leonardo DiCaprio
      actor
      Howard Hughes
      1.0
    
    
      553829
      The Basketball Diaries
      1995
      Leonardo DiCaprio
      actor
      Jim Carroll
      1.0
    
    
      553830
      The Beach
      2000
      Leonardo DiCaprio
      actor
      Richard
      1.0
    
    
      553831
      The Departed
      2006
      Leonardo DiCaprio
      actor
      Billy
      1.0
    
    
      553833
      The Man in the Iron Mask
      1998
      Leonardo DiCaprio
      actor
      King Louis XIV
      1.0
    
    
      553834
      The Man in the Iron Mask
      1998
      Leonardo DiCaprio
      actor
      Philippe
      1.0
    
    
      553836
      The Revenant
      2015
      Leonardo DiCaprio
      actor
      Hugh Glass
      1.0
    
    
      553837
      The Wolf of Wall Street
      2013
      Leonardo DiCaprio
      actor
      Jordan Belfort
      1.0
    
    
      553838
      This Boy's Life
      1993
      Leonardo DiCaprio
      actor
      Toby
      3.0
    
    
      553839
      Titanic
      1997
      Leonardo DiCaprio
      actor
      Jack Dawson
      1.0
    
    
      553840
      Total Eclipse
      1995
      Leonardo DiCaprio
      actor
      Arthur Rimbaud
      1.0
    
    
      553841
      What's Eating Gilbert Grape
      1993
      Leonardo DiCaprio
      actor
      Arnie Grape
      2.0



In [12]:

    
leo_major_roles = cast[(cast.name == 'Leonardo DiCaprio') & (cast.n <= 3)]



In [13]:

    
# And only in recent (>=2010) years?



In [14]:

    
leo_major_roles[cast.year >= 2010].sort_values('year')









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':






    Out[14]:






  
    
      
      title
      year
      name
      type
      character
      n
    
  
  
    
      553820
      Inception
      2010
      Leonardo DiCaprio
      actor
      Cobb
      1.0
    
    
      553827
      Shutter Island
      2010
      Leonardo DiCaprio
      actor
      Teddy Daniels
      1.0
    
    
      553821
      J. Edgar
      2011
      Leonardo DiCaprio
      actor
      J. Edgar Hoover
      1.0
    
    
      553817
      Django Unchained
      2012
      Leonardo DiCaprio
      actor
      Calvin Candie
      3.0
    
    
      553837
      The Wolf of Wall Street
      2013
      Leonardo DiCaprio
      actor
      Jordan Belfort
      1.0
    
    
      553836
      The Revenant
      2015
      Leonardo DiCaprio
      actor
      Hugh Glass
      1.0



In [ ]:



In [15]:

    
# What are the 5 longest movie titles ever?

pd.set_option('max_colwidth', 300)

t = titles.copy()
t['len'] = t.title.str.len()
t = t.sort_values('len', ascending=False)
t.head()









    Out[15]:






  
    
      
      title
      year
      len
    
  
  
    
      174310
      Night of the Day of the Dawn of the Son of the Bride of the Return of the Revenge of the Terror of the Attack of the Evil Mutant Hellbound Flesh Eating Crawling Alien Zombified Subhumanoid Living Dead, Part 5
      2011
      208
    
    
      180077
      Night of the Day of the Dawn of the Son of the Bride of the Return of the Revenge of the Terror of the Attack of the Evil, Mutant, Hellbound, Flesh-Eating Subhumanoid Zombified Living Dead, Part 3
      2005
      196
    
    
      12993
      Brigitte, Laura, Ursula, Monica, Raquel, Litz, Florinda, Barbara, Claudia, e Sofia le chiamo tutte... anima mia
      1974
      111
    
    
      150746
      Film d'amore e d'anarchia, ovvero 'stamattina alle 10 in via dei Fiori nella nota casa di tolleranza...'
      1973
      104
    
    
      25811
      Those Magnificent Men in Their Flying Machines or How I Flew from London to Paris in 25 hours 11 minutes
      1965
      104



In [ ]:



In [16]:

    
# Plot the number of films that have been released each decade over the history of cinema.

%matplotlib inline



In [17]:

    
t = titles
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')









    Out[17]:





<matplotlib.axes._subplots.AxesSubplot at 0x2808d278>



In [ ]:



In [ ]:

	title	year	name	type	character	n
0	Suuri illusioni	1985	Homo $	actor	Guests	22.0
1	Battle of the Sexes	2017	$hutter	actor	Bobby Riggs Fan	10.0
2	Secret in Their Eyes	2015	$hutter	actor	2002 Dodger Fan	NaN
3	Steve Jobs	2015	$hutter	actor	1988 Opera House Patron	NaN
4	Straight Outta Compton	2015	$hutter	actor	Club Patron	NaN

	title	year
66884	Clutch Control	2022
165691	Avatar 4	2022
34974	New Rebellion	2023

	title	year	name	type	character	n
553812	Blood Diamond	2006	Leonardo DiCaprio	actor	Danny Archer	1.0
553813	Body of Lies	2008	Leonardo DiCaprio	actor	Roger Ferris	1.0
553814	Catch Me If You Can	2002	Leonardo DiCaprio	actor	Frank Abagnale Jr.	1.0
553815	Celebrity	1998	Leonardo DiCaprio	actor	Brandon Darrow	93.0
553816	Critters 3	1991	Leonardo DiCaprio	actor	Josh	7.0
553817	Django Unchained	2012	Leonardo DiCaprio	actor	Calvin Candie	3.0
553818	Don's Plum	2001	Leonardo DiCaprio	actor	Derek	4.0
553819	Gangs of New York	2002	Leonardo DiCaprio	actor	Amsterdam Vallon	1.0
553820	Inception	2010	Leonardo DiCaprio	actor	Cobb	1.0
553821	J. Edgar	2011	Leonardo DiCaprio	actor	J. Edgar Hoover	1.0
553822	Marvin's Room	1996	Leonardo DiCaprio	actor	Hank	2.0
553823	Model Culture: One + Night in Bangkok	2009	Leonardo DiCaprio	actor	Himself	NaN
553824	Poison Ivy	1992	Leonardo DiCaprio	actor	Guy	9.0
553825	Revolutionary Road	2008	Leonardo DiCaprio	actor	Frank Wheeler	2.0
553826	Romeo + Juliet	1996	Leonardo DiCaprio	actor	Romeo	1.0
553827	Shutter Island	2010	Leonardo DiCaprio	actor	Teddy Daniels	1.0
553828	The Aviator	2004	Leonardo DiCaprio	actor	Howard Hughes	1.0
553829	The Basketball Diaries	1995	Leonardo DiCaprio	actor	Jim Carroll	1.0
553830	The Beach	2000	Leonardo DiCaprio	actor	Richard	1.0
553831	The Departed	2006	Leonardo DiCaprio	actor	Billy	1.0
553832	The Great Gatsby	2013	Leonardo DiCaprio	actor	Jay Gatsby	12.0
553833	The Man in the Iron Mask	1998	Leonardo DiCaprio	actor	King Louis XIV	1.0
553834	The Man in the Iron Mask	1998	Leonardo DiCaprio	actor	Philippe	1.0
553835	The Quick and the Dead	1995	Leonardo DiCaprio	actor	Kid	4.0
553836	The Revenant	2015	Leonardo DiCaprio	actor	Hugh Glass	1.0
553837	The Wolf of Wall Street	2013	Leonardo DiCaprio	actor	Jordan Belfort	1.0
553838	This Boy's Life	1993	Leonardo DiCaprio	actor	Toby	3.0
553839	Titanic	1997	Leonardo DiCaprio	actor	Jack Dawson	1.0
553840	Total Eclipse	1995	Leonardo DiCaprio	actor	Arthur Rimbaud	1.0
553841	What's Eating Gilbert Grape	1993	Leonardo DiCaprio	actor	Arnie Grape	2.0

	title	year	len
174310	Night of the Day of the Dawn of the Son of the Bride of the Return of the Revenge of the Terror of the Attack of the Evil Mutant Hellbound Flesh Eating Crawling Alien Zombified Subhumanoid Living Dead, Part 5	2011	208
180077	Night of the Day of the Dawn of the Son of the Bride of the Return of the Revenge of the Terror of the Attack of the Evil, Mutant, Hellbound, Flesh-Eating Subhumanoid Zombified Living Dead, Part 3	2005	196
12993	Brigitte, Laura, Ursula, Monica, Raquel, Litz, Florinda, Barbara, Claudia, e Sofia le chiamo tutte... anima mia	1974	111
150746	Film d'amore e d'anarchia, ovvero 'stamattina alle 10 in via dei Fiori nella nota casa di tolleranza...'	1973	104
25811	Those Magnificent Men in Their Flying Machines or How I Flew from London to Paris in 25 hours 11 minutes	1965	104