In [1]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
In [10]:
import pandas_datareader as pdr
In [13]:
all_data = {}
for ticker in ['IBM', 'MSFT', 'GOOG']:
all_data[ticker] = pdr.get_data_yahoo(ticker)
all_data
In [14]:
obj = Series(list('cadaabbcc'))
In [15]:
obj.unique()
Out[15]:
In [16]:
obj.value_counts()
Out[16]:
In [17]:
mask = obj.isin(['b', 'c'])
mask
Out[17]:
In [18]:
obj[mask]
Out[18]:
In [19]:
df = DataFrame({'Qu1' : [1, 3, 4, 3, 4],
'Qu2' : [2, 3, 1, 2, 3],
'Qu3' : [1, 5, 2, 4, 4]})
df
Out[19]:
In [20]:
result = df.apply(pd.value_counts).fillna(0)
result
Out[20]:
In [21]:
string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])
string_data
Out[21]:
In [22]:
string_data.isnull()
Out[22]:
In [23]:
data = Series([1, np.nan, 3.5, np.nan, 7])
data.dropna()
Out[23]:
In [24]:
data[data.notnull()]
Out[24]:
In [27]:
data = DataFrame([[1., 6.5, 3.],
[1, np.nan, np.nan],
[np.nan, np.nan, np.nan],
[np.nan, 6.5, 3.]])
cleaned = data.dropna()
cleaned
Out[27]:
In [28]:
data
Out[28]:
In [29]:
data.dropna(how = 'all')
Out[29]:
In [30]:
data[4] = np.nan
data
Out[30]:
In [33]:
data.dropna(axis = 1, how = 'all')
Out[33]:
In [35]:
df = DataFrame(np.random.randn(7, 3))
df
Out[35]:
In [37]:
df.ix[:4, 1] = np.nan
df.ix[:2, 2] = np.nan
df
Out[37]:
In [41]:
df.dropna(thresh = 3)
Out[41]:
In [43]:
df.fillna(0)
Out[43]:
In [44]:
df.fillna({1 : 0.5, 3 : -1})
Out[44]:
In [46]:
df.fillna(0, inplace = True)
df
Out[46]:
In [47]:
df = DataFrame(np.random.randn(6, 3))
df.ix[2:, 1] = np.nan
df.ix[4:, 2] = np.nan
df
Out[47]:
In [48]:
df.fillna(method = 'ffill')
Out[48]:
In [50]:
df.fillna(method = 'ffill', limit = 2)
Out[50]:
In [51]:
df.fillna(df.mean())
Out[51]:
In [52]:
data = Series(np.random.randn(10),
index = [list('aaabbbccdd'),
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
data
Out[52]:
In [53]:
data.index
Out[53]:
In [55]:
data.ix['b']
Out[55]:
In [57]:
data['b':'c']
Out[57]:
In [58]:
data[['b', 'd']]
Out[58]:
In [59]:
data[:, 2]
Out[59]:
In [60]:
data.unstack()
Out[60]:
In [61]:
data.unstack().stack()
Out[61]:
In [62]:
frame = DataFrame(np.arange(12).reshape((4, 3)),
index = [list('aabb'), [1, 2, 1, 2]],
columns = [['Ohio', 'Ohio', 'Colorado'],
['Green', 'Red', 'Green']])
frame
Out[62]:
In [63]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame
Out[63]:
In [65]:
frame['Ohio']
Out[65]:
In [66]:
frame.swaplevel('key1', 'key2')
Out[66]:
In [67]:
frame.sortlevel(1)
Out[67]:
In [68]:
frame.swaplevel('key1', 'key2').sortlevel(0)
Out[68]:
In [69]:
frame.sum(level = 'key2')
Out[69]:
In [70]:
frame.sum(level='color', axis = 1)
Out[70]:
In [75]:
frame = DataFrame({'a' : np.arange(7),
'b' : np.arange(7, 0, -1),
'c' : ['one'] * 3 + ['two'] * 4,
'd' : [0, 1, 2, 0, 1, 2, 3]})
frame
Out[75]:
In [76]:
frame.set_index(['c', 'd'])
Out[76]:
In [77]:
frame.set_index(['c', 'd'], drop=False)
Out[77]:
In [78]:
frame
Out[78]:
In [ ]: