In [1]:
%matplotlib inline
import pandas as pd
In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[2]:
In [7]:
titles = pd.DataFrame.from_csv('data/titles.csv', index_col=None,encoding='utf-8')
titles.head()
Out[7]:
In [7]:
chardet.detect?
In [8]:
type(rawdata)
Out[8]:
In [16]:
import glob
glob.glob('*.csv')
Out[16]:
In [19]:
import glob
from chardet.universaldetector import UniversalDetector
detector = UniversalDetector()
for filename in glob.glob('*.csv'):
print( filename.ljust(60)),
detector.reset()
for line in file(filename, 'rb'):
detector.feed(line)
if detector.done: break
detector.close()
print( detector.result)
In [ ]:
In [22]:
import chardet
from io import StringIO
rawdata=open('data/cast.csv').read()
chardet.detect(StringIO(rawdata))
In [10]:
cast = pd.DataFrame.from_csv('data/cast.csv', index_col=None)
cast.head()
Out[10]:
In [7]:
titles['title'].value_counts()[:10]
Out[7]:
In [ ]:
In [8]:
titles[(titles['year']<1940)&(titles['year']>=1930)]['year'].value_counts()
Out[8]:
In [ ]:
In [29]:
dec=((titles['year']//10)*10)
print(dec.max())
print(dec.min())
dec.hist(bins=(dec.max()-dec.min())/10+1)
Out[29]:
In [35]:
hamdec=titles[titles['title']=="Hamlet"]
hamdec['year']=(hamdec['year']//10)*10
hamdec['year'].hist(bins=(hamdec['year'].max()-hamdec['year'].min())/10+1)
Out[35]:
In [34]:
hamdec
Out[34]:
In [36]:
hamdec=cast[cast['character']=="Rustler"]
hamdec['year']=(hamdec['year']//10)*10
hamdec['year'].hist(bins=(hamdec['year'].max()-hamdec['year'].min())/10+1)
Out[36]:
In [ ]:
In [37]:
hamdec=cast[cast['character']=="Hamlet"]
hamdec['year']=(hamdec['year']//10)*10
hamdec['year'].hist(bins=(hamdec['year'].max()-hamdec['year'].min())/10+1)
Out[37]:
In [ ]:
In [39]:
cast['character'].value_counts()[:11]
Out[39]:
In [ ]:
In [9]:
cast[cast['character']=='Herself']['name'].value_counts()[:10]
Out[9]:
In [ ]:
In [42]:
cast[cast['character']=='Himself']['name'].value_counts()[:10]
Out[42]:
In [ ]:
In [43]:
cast[cast['year']==1945]['name'].value_counts()[:10]
Out[43]:
In [ ]:
In [44]:
cast[cast['year']==1985]['name'].value_counts()[:10]
Out[44]:
In [ ]:
In [49]:
cast[cast['name']=='Mammootty'].hist(column='year')
Out[49]:
In [46]:
cast.hist?
In [ ]:
In [52]:
cast[cast['character'].str.startswith('Patron in')]['character'].value_counts()[:10]
Out[52]:
In [54]:
cast[cast['character'].str.startswith('Science')]['character'].value_counts()[:10]
Out[54]:
In [ ]:
In [55]:
cast[cast['name']=='Judi Dench'].plot(kind='scatter',x='year',y='n')
Out[55]:
In [ ]:
In [ ]:
In [ ]:
In [56]:
cast[cast['name']=='Sidney Poitier'].plot(kind='scatter',x='year',y='n')
Out[56]:
In [ ]:
In [60]:
cast[(cast['n']==1)&(cast['type']=='actor')&(cast['year']<1960)&(cast['year']>=1950)].shape[0]
Out[60]:
In [61]:
cast[(cast['n']==1)&(cast['type']=='actress')&(cast['year']<1960)&(cast['year']>=1950)].shape[0]
Out[61]:
In [63]:
cast[(cast['n']==2)&(cast['type']=='actor')&(cast['year']<1960)&(cast['year']>=1950)].shape[0]
Out[63]:
In [62]:
cast[(cast['n']==2)&(cast['type']=='actress')&(cast['year']<1960)&(cast['year']>=1950)].shape[0]
Out[62]:
In [ ]: