In [136]:
import pandas as pd
%xmode plain
In [164]:
from IPython.core.display import HTML
HTML('<style>{}</style>'.format(open('style-table.css').read()))
Out[164]:
In [57]:
df1 = pd.read_csv('sample.csv')
df1.columns = map(str.lower, df1.columns)
df1
Out[57]:
In [4]:
df1.dtypes
Out[4]:
In [5]:
df1.groupby('director')
Out[5]:
In [6]:
df1.groupby('director').head() # looks the same!
Out[6]:
In [7]:
df1.groupby('director').head().equals(df1)
Out[7]:
In [8]:
df1.reindex_axis(df1.director)
Out[8]:
In [9]:
list(df1.groupby('director'))
Out[9]:
In [10]:
df1.groupby('director').get_group('Alfred Hitchcock')
Out[10]:
In [11]:
df1.groupby('director').first()
Out[11]:
In [12]:
df1.groupby('director').last()
Out[12]:
In [13]:
df1.groupby('director').max()
Out[13]:
In [14]:
df1.groupby('director').max().ix['Alfred Hitchcock']
Out[14]:
In [161]:
df2 = df1.groupby(['director', df1.index]).first()
df2
Out[161]:
In [52]:
df3 = df2.reset_index('director')
df3
Out[52]:
In [60]:
df4 = df3[['title', 'year', 'director']] #.equals(df1)
df4
Out[60]:
In [67]:
df4.sort_index()#.equals(df1)
Out[67]:
In [76]:
df1
Out[76]:
In [73]:
df1.dtypes
Out[73]:
In [74]:
df4.sort_index().dtypes
Out[74]:
In [77]:
df1.index == df4.sort_index().index
Out[77]:
In [78]:
print(df1)
print(df1 == df4.sort_index())
print(df1.equals(df4.sort_index()))
In [82]:
df2.reset_index(1)
#'director')
#pd.DataFrame(index=df2.index)
Out[82]:
In [90]:
x = df2.reset_index(1)
print(x.ix['George Cukor'])
print(x.ix['Alfred Hitchcock'])
print(x.index)
In [101]:
y = df1.set_index('director')
y
Out[101]:
In [107]:
y.ix['George Cukor']
Out[107]:
In [108]:
y.ix['Alfred Hitchcock']
Out[108]:
In [112]:
y.loc['George Cukor']
Out[112]:
In [113]:
y.loc['Alfred Hitchcock']
Out[113]:
In [117]:
y.at['Alfred Hitchcock','title']
Out[117]:
In [118]:
y.at['George Cukor','title']
Out[118]:
In [145]:
p(df1.set_index('director'))
In [125]:
df1.set_index('director').set_index('title', append=True).loc['Alfred Hitchcock']
Out[125]:
In [128]:
df1.set_index(['director', 'title'])
Out[128]:
In [137]:
df1.set_index(['director', 'title']).loc['Alfred Hitchcock':'George Cukor']
In [138]:
df1.set_index(['director', 'title']).sort_index().loc['Alfred Hitchcock':'George Cukor']
Out[138]:
In [ ]:
df1.set_index(['director', 'title']).sort_index()
In [154]:
df1.groupby(['director', 'title']).first()
Out[154]:
In [134]:
df1.groupby(['director', 'title']).first()
Out[134]:
In [184]:
#
# Official diagram generation
#
open('u__tyd.html', 'w').write(df1.to_html())
open('u_d_ty.html', 'w').write(df1.set_index('director').to_html())
open('u_dt_y.html', 'w').write(df1.set_index('director').set_index('title', append=True).to_html())
open('s_d_ty.html', 'w').write(df1.set_index('director').sort_index().to_html())
open('s_dt_y.html', 'w').write(df1.set_index('director').set_index('title', append=True).sort_index().to_html())
print(
df1.set_index('director').set_index('title', append=True).equals(
df1.set_index(['director', 'title'])
))
print(
df1.set_index(['director', 'title']).sort_index().equals(
df1.groupby(['director', 'title']).first()
))
print(
df1.set_index('director').sort_index().set_index('title', append=True).equals(
df1.groupby(['director', 'title']).first()
))
In [182]:
df1.set_index('director').sort_index().set_index('title')
Out[182]:
In [179]:
Out[179]:
In [ ]:
In [ ]:
In [ ]:
In [127]:
df1.set_index(['director', 'title']).loc['Alfred Hitchcock']
Out[127]:
In [129]:
df1.set_index(['director', 'title']).reset_index('title')
Out[129]:
In [130]:
df1.set_index(['director', 'title']).reset_index(['director', 'title'])
Out[130]:
In [131]:
df1
Out[131]:
In [ ]:
In [ ]:
In [35]:
df2.unstack(level=0)
Out[35]:
In [36]:
df2.transpose()
Out[36]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [17]:
df2.ix['Alfred Hitchcock']
Out[17]:
In [19]:
df2.ix['Alfred Hitchcock',3]
Out[19]:
In [20]:
df1.groupby('director')['year'].agg([('first_year', min), ('last_year', max)])
Out[20]:
In [21]:
df1.groupby('director').agg({'title': min, 'year': max})
Out[21]:
In [24]:
df3 = df1.groupby(['director', 'title']).first()
df3
Out[24]:
In [25]:
df4 = df1.groupby(['director', 'year']).first()
df4
Out[25]:
In [26]:
df1.groupby(['director', 'title', 'year']).first()
Out[26]:
In [27]:
s1 = df1.stack()
s1
Out[27]:
In [28]:
s2 = df1.unstack()
s2
Out[28]:
In [29]:
df3.stack()
Out[29]:
In [30]:
df3.unstack()
Out[30]:
In [ ]: