In [48]:
%matplotlib inline
import pandas as pd
import seaborn as sbn
sbn.set()
In [3]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[3]:
In [4]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[4]:
In [5]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[5]:
In [ ]:
In [41]:
both = cast[(cast.character=='Superman') | (cast.character == 'Batman')].groupby(['year','character']).size().unstack().fillna(0)
diff = both.Superman - both.Batman
print("Superman: " + str(len(diff[diff>0])))
In [ ]:
In [42]:
both = cast[(cast.character=='Superman') | (cast.character == 'Batman')].groupby(['year','character']).size().unstack().fillna(0)
diff = both.Batman - both.Superman
print("Batman: " + str(len(diff[diff>0])))
In [ ]:
In [51]:
cast.groupby(['year','type']).size().unstack().plot()
Out[51]:
In [ ]:
In [52]:
cast.groupby(['year','type']).size().unstack().plot(kind='area')
Out[52]:
In [ ]:
In [55]:
foo = cast.groupby(['year','type']).size().unstack().fillna(0)
In [60]:
foo['diff'] = foo['actor']-foo['actress']
foo['diff'].plot()
Out[60]:
In [61]:
foo['totalRoles'] = foo['actor']+foo['actress']
foo['manFrac'] = foo['actor']/foo['totalRoles']
foo['manFrac'].plot()
Out[61]:
In [ ]:
In [68]:
support = cast[cast.n==2]
bar = support.groupby(['year','type']).size().unstack().fillna(0)
bar['totalRoles'] = bar['actor']+bar['actress']
bar['manFrac'] = bar['actor']/bar['totalRoles']
bar['manFrac'].plot()
Out[68]:
In [ ]:
In [84]:
thirdWheel = cast[cast.n==3]
baz = thirdWheel.groupby(['year','type']).size().unstack().fillna(0)
baz['totalRoles'] = baz['actor']+baz['actress']
baz['manFrac'] = baz['actor']/baz['totalRoles']
foo['manFrac'].plot() + (bar['manFrac'].plot() + baz['manFrac'].plot())
In [ ]: