In [5]:
%matplotlib inline
import pandas as pd
In [6]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[6]:
In [7]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[7]:
In [8]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
In [49]:
cast.head()
Out[49]:
In [11]:
titles.count()
Out[11]:
212811
In [16]:
titles.sort('year').head()
Out[16]:
Reproduction of the Corbett and Fitzimmons Fight, Miss Jerry
In [21]:
t = titles
t[t.title == 'Hamlet'].count()
Out[21]:
19
In [23]:
t = titles
t[t.title == "North by Northwest"]
Out[23]:
1
In [26]:
t = titles
t[t.title == 'Hamlet'].sort('year').head()
Out[26]:
1910
In [30]:
t = titles
t[t.title == "Treasure Island"].sort('year')
Out[30]:
In [ ]:
In [32]:
t = titles
t[t.year == 1950].count()
Out[32]:
1033
In [36]:
t = titles
t[t.year == 1960].count()
Out[36]:
1423
In [41]:
t = titles
t[(t.year >= 1950) & (t.year <= 1959)].count()
Out[41]:
12051
In [44]:
t = titles
t[t.title == 'Batman']
Out[44]:
In [ ]:
In [103]:
c = cast
c = len(c[c.title == 'Inception'])
c
Out[103]:
72
In [104]:
c = cast
c = c[c.title == 'Inception']
c = c[c.n.isnull()]
len(c)
Out[104]:
21
In [95]:
c = cast
c = c[c.title == 'Inception']
c = c[c.n.notnull()]
len(c)
Out[95]:
51
In [110]:
c = cast
c = c[c.title == "North by Northwest"]
c = c[c.n.notnull()]
c.sort('n')
Out[110]:
In [ ]:
In [112]:
c = cast
c = c[c.title == "Sleuth"]
c.sort(['n'])
Out[112]:
In [ ]:
In [115]:
c = cast
c = c[(c.title == 'Sleuth') & (c.year == 2007)]
c.sort(['n'])
Out[115]:
In [ ]:
In [118]:
c = cast
c = c[(c.title == 'Hamlet') & (c.year == 1921)]
len(c.n)
Out[118]:
9
In [119]:
c = cast
c = c[(c.title == 'Hamlet') & (c.year == 1996)]
len(c.n)
Out[119]:
55
In [122]:
c = cast
c = c[c.character == 'Hamlet']
len(c)
Out[122]:
81
In [123]:
c = cast
c = c[c.character == 'Ophelia']
len(c)
Out[123]:
96
In [125]:
c = cast
c = c[c.character == "The Dude"]
len(c)
Out[125]:
16
In [126]:
c = cast
c = c[c.character == 'The Stranger']
len(c)
Out[126]:
190
In [127]:
c = cast
c = c[c.name == "Sidney Poitier"]
len(c)
Out[127]:
43
In [6]:
c = cast
c = c[c.name == "Judi Dench"]
len(c)
Out[6]:
51
In [15]:
c = cast
c = c[(c.name == 'Cary Grant')]
c = c[(c.year >= 1940) & (c.year < 1950)]
c = c[c.n == 2]
c
Out[15]:
In [ ]:
In [20]:
c = cast
c = c[c.name == 'Cary Grant']
c = c[(c.year >= 1940) & (c.year < 1950)]
c.sort('year')
Out[20]:
In [ ]:
In [26]:
c = cast
c = c[(c.year >= 1950) & (c.year < 1960)]
c = c[c.type == 'actor']
len(c.n)
Out[26]:
147404
In [30]:
c = cast
c = c[c.type == 'actress']
len(c.n)
Out[30]:
106867
In [35]:
c = cast
c = c[c.year <= 1980]
c = c[c.n == 1]
c.count()
Out[35]:
61285
In [36]:
c = cast
c = c[c.year <= 1980]
c = c[c.n > 1]
c.count()
Out[36]:
630932
In [43]:
c = cast
c = c[c.n.isnull()]
len(c)
Out[43]:
1229941
In [ ]: