In [2]:
%matplotlib inline
import pandas as pd
In [3]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[3]:
In [4]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None)
titles.head()
Out[4]:
In [5]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[5]:
In [6]:
titles.title.value_counts().head(10)
Out[6]:
In [ ]:
In [7]:
titles[(titles["year"]>=1930) & (titles["year"]<1940)].title.value_counts().head(3)
Out[7]:
In [ ]:
In [8]:
t = titles
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[8]:
In [ ]:
In [9]:
t = titles[titles.title == 'Hamlet']
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[9]:
In [ ]:
In [10]:
t = cast[cast.character == 'Rustler']
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[10]:
In [ ]:
In [11]:
t = cast[cast.character == 'Hamlet']
(t.year // 10 * 10).value_counts().sort_index().plot(kind='bar')
Out[11]:
In [ ]:
In [12]:
cast.character.value_counts().head(11)
Out[12]:
In [ ]:
In [15]:
cast[cast.character == "Herself"].name.value_counts().head(10)
Out[15]:
In [ ]:
In [16]:
cast[cast.character == "Himself"].name.value_counts().head(10)
Out[16]:
In [ ]:
In [20]:
cast[cast.year == 1945].name.value_counts().head(1)
Out[20]:
In [ ]:
In [21]:
cast[cast.year == 1985].name.value_counts().head(1)
Out[21]:
In [ ]:
In [32]:
# (cast[cast.name == "Mammootty"].character // 1 * 1).value_counts().sort_index().plot(kind='bar')
cast[cast.name == 'Mammootty'].year.value_counts().sort_index().plot()
Out[32]:
In [ ]:
In [36]:
cast[cast.character.str.startswith('Patron in')].character.value_counts().head(10)
Out[36]:
In [ ]:
In [38]:
cast[cast.character.str.startswith("Science")].character.head(10)
Out[38]:
In [ ]:
In [43]:
char = cast[cast.name == "Judi Dench"].sort("year")
char = char[char.n.notnull()]
char.plot(x="year",y ="n", kind="scatter")
Out[43]:
In [ ]:
In [44]:
char = cast[cast.name == "Cary Grant"].sort("year")
char = char[char.n.notnull()]
char.plot(x="year",y ="n", kind="scatter")
Out[44]:
In [ ]:
In [45]:
char = cast[cast["name"]== "Sidney Poitier"]
char = char[char.n.notnull()]
char.plot(x="year",y="n",kind="scatter")
Out[45]:
In [ ]:
In [61]:
c = cast
c = c[c.year // 10 == 195]
c = c[(c.n == 1) ]
c.type.value_counts()
Out[61]:
In [ ]:
In [60]:
c = cast
c = c[c.year // 10 == 195]
c = c[(c.n == 2) ]
c.type.value_counts()
Out[60]:
In [ ]: