In [ ]:
%run setup_env.py
%matplotlib inline
In [ ]:
log.info('creating Series')
a = pd.Series((1,2,4,np.nan,5,9))
display_pretty(a)
log.info('creating DataFrame using index and columns')
dates = pd.date_range('20140301', periods=6, freq='M')
df = pd.DataFrame(np.random.rand(6, 4), index=dates, columns=list('ABCD'))
display_pretty(df)
log.info('creating DataFrame using dict')
df2 = pd.DataFrame({
'C1': 1,
'C2': np.arange(4),
'C3': pd.Series(1, index=range(4)),
'C4': pd.Categorical(['A', 'B', 'C', 'D']),
'C5': 'Ubuntu',
'C6': 'OK',
})
display_pretty(df2)
df['A']
df[:3]
df['20140301':'20140501']
df.loc[dates[0]]
df.loc[:, ['A', 'B']]
df.loc['20140331':'20140531', ['A', 'B']]
df.loc['20140331', ['A', 'B']]
df.loc[dates[0], 'A']
df.at[dates[0], 'A']
df.iloc[3]
df.iloc[3:5, 0:2]
df.iloc[:, [0, 3]]
df.iloc[1,1]
df.iat[1,1]
df[df.A > 0]
df[df > 0]
df2[df2['C4'].isin(['A'])]
df.dropna(how='any')
df.fillna(value=3)
df.isnull(df)
df.mean()
df.mean(1)
df.sub #
df.apply(np.cumsum)
df.apply(lambda x: x.max() - x.min())
s = pd.Series(np.random.randint(0, 7, 10))
s.value_counts()
df2['C4'].str.lower()
df = pd.DataFrame(np.random.randn(10, 4))
pieces = [df[:3], df[3:7], df[7:]]
pd.concat(pieces)
In [ ]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
pd.merge(left, right, on='key')
In [ ]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
df.append(df.iloc[3], ignore_index=True)
In [ ]:
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B' : ['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C' : np.random.randn(8),
'D' : np.random.randn(8)})
df.groupby('A').sum()
In [ ]:
df.groupby(['A', 'B']).sum()
In [ ]:
df2 = df.groupby(['A', 'B']).sum()
stacked = df2.stack()
log.info('stacke')
display_pretty(stacked)
log.info('unstack')
display_pretty(stacked.unstack())
log.info('unstack 1')
display_pretty(stacked.unstack(1))
log.info('unstack 0')
display_pretty(stacked.unstack(0))
In [ ]:
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
'B' : ['A', 'B', 'C'] * 4,
'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
'D' : np.random.randn(12),
'E' : np.random.randn(12)})
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
In [ ]:
rng = pd.date_range('2014/1/1', periods=100, freq='S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.resample('5Min', how='sum')
ts.to_period('M')
In [ ]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range('2014/1/1', periods=1000))
ts.cumsum().plot()
In [ ]:
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=['A', 'B', 'C', 'D'])
df = df.cumsum()
plt.figure(); df.plot(); plt.legend(loc='best')