In [4]:
%matplotlib inline
import pandas as pd
import seaborn as sbn
sbn.set()
In [5]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[5]:
In [6]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[6]:
In [7]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[7]:
In [14]:
titles.title.value_counts().head(10)
Out[14]:
In [ ]:
In [17]:
titles[titles.year // 10 == 193].year.value_counts().head(3)
Out[17]:
In [ ]:
In [40]:
titles['decade'] = ((titles.year // 10) * 10)
titles.decade.value_counts().sort_index().plot(kind='bar')
Out[40]:
In [41]:
titles.year.value_counts().sort_index().plot(kind='bar')
Out[41]:
In [42]:
titles['decade'] = ((titles.year // 10) * 10)
titles[titles.title=='Hamlet'].decade.value_counts().sort_index().plot(kind='bar')
Out[42]:
In [ ]:
In [54]:
cast['decade'] = ((cast.year // 10) * 10)
cast[cast.character=='Rustler'].decade.value_counts().sort_index().plot(kind='bar')
Out[54]:
In [ ]:
In [45]:
cast['decade'] = ((cast.year // 10) * 10)
cast[cast.character=='Hamlet'].decade.value_counts().sort_index().plot(kind='bar')
Out[45]:
In [ ]:
In [12]:
cast.character.value_counts().head(11)
Out[12]:
In [ ]:
In [18]:
cast[cast.character=='Herself'].name.value_counts().head(10)
Out[18]:
In [ ]:
In [19]:
cast[cast.character=='Himself'].name.value_counts().head(10)
Out[19]:
In [ ]:
In [20]:
cast[cast.year==1945].name.value_counts().head(10)
Out[20]:
In [ ]:
In [21]:
cast[cast.year==1985].name.value_counts().head(10)
Out[21]:
In [ ]:
In [22]:
cast[cast.name=='Ron Jeremy'].year.value_counts().sort_index().plot(kind='bar')
Out[22]:
In [ ]:
In [59]:
c = cast[cast.character.str.startswith('Patron in')]
c.character.value_counts().head(10)
Out[59]:
In [ ]:
In [60]:
c = cast[cast.character.str.startswith('Science')]
c.character.value_counts().head(10)
Out[60]:
In [ ]:
In [67]:
c=cast[cast.name=="Judi Dench"]
#c.n.value_counts().sort_index().plot(kind="bar")
c.plot(kind="scatter",x='year',y='n',alpha=0.5)
Out[67]:
In [ ]:
In [69]:
d=cast[cast.name=="Cary Grant"]
#c.n.value_counts().sort_index().plot(kind="bar")
d.plot(kind="scatter",x='year',y='n',alpha=0.5)
Out[69]:
In [ ]:
In [25]:
cast[cast.name=='Ron Jeremy'].plot(kind='scatter',x='year',y='n')
Out[25]:
In [ ]:
In [28]:
cast[(cast.year//10==195)&(cast.n==1)].type.value_counts()
Out[28]:
In [ ]:
In [29]:
cast[(cast.year//10==195)&(cast.n==2)].type.value_counts()
Out[29]:
In [ ]: