In [1]:
%matplotlib inline
import pandas as pd
In [2]:
titles = pd.DataFrame.from_csv('../data/titles.csv', index_col=None)
titles.head()
Out[2]:
In [3]:
cast = pd.DataFrame.from_csv('../data/cast.csv', index_col=None)
cast.head()
Out[3]:
In [4]:
len(titles)
Out[4]:
In [5]:
titles.sort_values(by='year').head(2)
Out[5]:
In [6]:
len(titles[titles.title == "Hamlet"])
Out[6]:
In [7]:
len(titles[titles.title == "North by Northwest"])
Out[7]:
In [8]:
titles[titles.title == "Hamlet"].year.min()
Out[8]:
In [9]:
titles[titles.title == "Treasure Island"].sort_values(by='year')
Out[9]:
In [10]:
len(titles[titles.year == 1950])
Out[10]:
In [11]:
len(titles[titles.year == 1960])
Out[11]:
In [12]:
len(titles[(titles.year >= 1950) & (titles.year <= 1959)])
Out[12]:
In [13]:
titles[titles.title == "Batman"]
Out[13]:
In [14]:
len(cast[cast.title == "Inception"])
Out[14]:
In [15]:
len(cast[(cast.title == "Inception") & (cast.n.isnull())])
Out[15]:
In [16]:
len(cast[(cast.title == "Inception") & (cast.n.notnull())])
Out[16]:
In [17]:
cast[(cast.title == "North by Northwest") & (cast.n.notnull())].sort_values(by='n')
Out[17]:
In [18]:
cast[(cast.title == "Sleuth") & (cast.year == 1972) & (cast.n.notnull())].sort_values(by='n')
Out[18]:
In [19]:
cast[(cast.title == "Sleuth") & (cast.year == 2007) & (cast.n.notnull())].sort_values(by='n')
Out[19]:
In [20]:
len(cast[(cast.title == "Hamlet") & (cast.year == 1921)])
Out[20]:
In [21]:
len(cast[(cast.title == "Hamlet") & (cast.year == 1996)])
Out[21]:
In [22]:
len(cast[cast.character == "Hamlet"])
Out[22]:
In [23]:
len(cast[cast.character == "Ophelia"])
Out[23]:
In [24]:
len(cast[cast.character == "The Dude"])
Out[24]:
In [25]:
len(cast[cast.character == "The Stranger"])
Out[25]:
In [26]:
len(cast[cast.name == "Sidney Poitier"])
Out[26]:
In [27]:
len(cast[cast.name == "Judi Dench"])
Out[27]:
In [28]:
cast[
(cast.name == 'Cary Grant') &
(cast.year // 10 == 194) &
(cast.n == 2)
].sort_values(by='year')
Out[28]:
In [29]:
cast[
(cast.name == 'Cary Grant') &
(cast.year // 10 == 194) &
(cast.n == 1)
].sort_values(by='year')
Out[29]:
In [30]:
len(cast[
(cast.year // 10 == 195) &
(cast.type == "actor")
])
Out[30]:
In [31]:
len(cast[
(cast.year // 10 == 195) &
(cast.type == "actress")
])
Out[31]:
In [32]:
len(cast[
(cast.year <= 1980) &
(cast.n == 1)
])
Out[32]:
In [33]:
len(cast[
(cast.year <= 1980) &
(cast.n != 1)
])
Out[33]:
In [34]:
len(cast[
(cast.year <= 1980) &
(cast.n.isnull())
])
Out[34]: