In [7]:
%matplotlib inline
import pandas as pd
In [8]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[8]:
In [9]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[9]:
In [10]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[10]:
In [13]:
titles.tail()
Out[13]:
In [14]:
len(titles)
Out[14]:
In [35]:
titles.sort(columns='year', ascending=True).head()[:2]
Out[35]:
In [ ]:
In [57]:
titles[titles['title'].str.contains('Hamlet')].sort('year')
Out[57]:
In [ ]:
In [45]:
len(titles[titles.title == 'North by Northwest'])
Out[45]:
In [ ]:
In [49]:
titles[titles['title'] 'Hamlet'].sort('year')[:1]
Out[49]:
In [ ]:
In [53]:
titles[titles.title == 'Treasure Island'].sort('year')
Out[53]:
In [ ]:
In [54]:
len(titles[titles.year == 1950])
Out[54]:
In [ ]:
In [55]:
movies_of_1960 = titles[titles.year == 1960]
len(movies_of_1960)
Out[55]:
In [ ]:
In [67]:
moviesOf1950And1959 = titles[(titles.year >= 1950) & (titles.year <= 1950)]
len(moviesOf1950And1959)
Out[67]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [82]:
titles.year.value_counts().sort_index().plot()
Out[82]:
In [ ]: