In [1]:
import numpy as np
import pandas as pd
In [2]:
from numpy.random import randn
In [20]:
np.random.seed(101) # the seed random dataset...meaning the 'random' numbers will actually be the same as the video
In [21]:
df = pd.DataFrame(randn(5,4),['a','b','c','d','e'],['w','x','y','z'])
In [22]:
df # nuhhh
Out[22]:
In [23]:
df['w'] # each column is a Series tho
Out[23]:
In [24]:
type(df['w']) # o_0
Out[24]:
In [25]:
type(df)
Out[25]:
In [26]:
df.w # SQL-like notation
Out[26]:
In [27]:
df[['w','z']]
Out[27]:
adding
In [41]:
df['new'] = df['w'] + df['y']
In [42]:
df # oh dang we just made something!
Out[42]:
dropping to new DataFrame
In [43]:
df.drop('new',1) # gotta define which axis to drop
Out[43]:
In [44]:
df # but wait! we didn't change the original
Out[44]:
To modify the original, use the inplace=True option
In [45]:
df.drop('new',axis=1,inplace=True)
In [46]:
df
Out[46]:
In [48]:
df.drop('e') #dropping columns
Out[48]:
In [49]:
df.shape
Out[49]:
In [51]:
df.loc['a'] # rows are also series
Out[51]:
In [54]:
df.iloc[2]
Out[54]:
In [55]:
df.loc['b','y']
Out[55]:
In [56]:
df.loc['a':'b','x':'y'] # similar to matrix selection
Out[56]:
In [58]:
df.loc[['a','b'],['x','y']] # individual row-column selection
Out[58]:
In [60]:
booldf = df > 0
booldf
Out[60]:
In [63]:
df[booldf] # also `df[df>0]`
Out[63]:
In [64]:
df['w']>0
Out[64]:
In [66]:
df[df['w']>0] # oooooOOOOOooo
Out[66]:
In [67]:
df[df['z']<0]
Out[67]:
In [71]:
resultdf = df[df['w']>0]
resultdf['x'] # two steps
Out[71]:
In [74]:
df[df['w']>0]['x'] # or ['x','y'] or ['x':'z']
Out[74]:
In [76]:
df[(df['w']>0) & (df['y']>1)] # one ampersand, not `and`
Out[76]:
In [77]:
df[(df['w']>0) | (df['y']>1)] # pipe operator for `or`
Out[77]:
In [79]:
df.reset_index() # specify `inplace=True` to change original dataset
Out[79]:
In [83]:
newind = 'CO ME NH MT NY'.split() # nice
In [84]:
df['States'] = newind
In [85]:
df.set_index('States')
Out[85]:
In [91]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside)) # this is cool and probably useful
hier_index = pd.MultiIndex.from_tuples(hier_index)
In [102]:
dfh = pd.DataFrame(randn(6,2),hier_index,('a','b'))
In [101]:
dfh # hnngggh
Out[101]:
In [100]:
dfh.loc['G2'].loc[2]
Out[100]:
In [104]:
dfh.index.names = ['Groups','Num']
dfh
Out[104]:
In [105]:
dfh.loc['G2'].loc[2]['b']
Out[105]:
In [106]:
dfh.loc['G1'].loc[3]['a']
Out[106]:
In [111]:
dfh.xs(1,level='Num')
Out[111]:
In [ ]: