In [8]:
%matplotlib inline
import pandas as pd
In [9]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[9]:
In [10]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[10]:
In [11]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[11]:
In [ ]:
In [17]:
c = cast
c = c[(c.character == 'Superman') | (c.character == 'Batman')]
c = c.groupby(['year', 'character']).size()
c = c.unstack()
c = c.fillna(0)
c.head()
Out[17]:
In [18]:
d = c.Superman - c.Batman
print('Superman years:')
print(len(d[d > 0.0]))
In [19]:
d = c.Superman - c.Batman
print('Batman years:')
print(len(d[d < 0.0]))
In [ ]:
In [23]:
c = cast
#c = c[(c.character == 'Superman') | (c.character == 'Batman')]
c = c.groupby(['year', 'type']).size()
c = c.unstack()
c = c.fillna(0)
c.plot()
Out[23]:
In [ ]:
In [24]:
c.plot(kind='area')
Out[24]:
In [ ]:
In [29]:
c = cast
c = c.groupby(['year', 'type']).size()
c = c.unstack('type')
(c.actor - c.actress).plot()
Out[29]:
In [ ]:
In [32]:
(c.actor/ (c.actor + c.actress)).plot(ylim=[0,1])
Out[32]:
In [ ]:
In [40]:
c = cast[(cast["n"] == 2) ]
c = c.groupby(['year','type']).size()
c = c.unstack('type')
(c.actor/ (c.actor + c.actress)).plot(ylim=[0,1])
Out[40]:
In [ ]:
In [62]:
c = cast
c = c[c.n <= 3]
c = c.groupby(['year', 'type', 'n']).size()
c = c.unstack('type')
r = c.actor / (c.actor + c.actress)
r = r.unstack('n')
r.plot(ylim=[0,1])
Out[62]:
In [ ]: