In [38]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
In [2]:
obj = Series([4, 8, -5, 2])
In [3]:
obj
Out[3]:
In [4]:
obj2 = Series([1, 2, -4, 8], index=['a', 'c', 'd', 'b'])
In [5]:
obj2
Out[5]:
In [6]:
obj.values
Out[6]:
In [10]:
type(obj2.values)
Out[10]:
In [12]:
obj2.index
Out[12]:
In [13]:
type(obj2.index)
Out[13]:
In [14]:
obj2['a']
Out[14]:
In [15]:
obj2['d'] = 4
obj2
Out[15]:
In [16]:
obj2[obj2 > 2]
Out[16]:
In [17]:
'b' in obj2
Out[17]:
In [18]:
sdata = {'Ohio': 1234, 'Texas': 789}
obj3 = pd.Series(sdata)
obj3
Out[18]:
In [19]:
obj4 = pd.Series(sdata, index=['Ohio', 'California', 'Texas'])
In [20]:
obj4
Out[20]:
In [21]:
pd.isnull(obj4)
Out[21]:
In [22]:
obj4.isnull()
Out[22]:
In [24]:
print(obj4.name)
In [25]:
obj4.name = 'pop'
In [26]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.8, 3.6, 2.4, 2.9, 3.2]}
In [27]:
frame = pd.DataFrame(data)
In [28]:
frame
Out[28]:
In [29]:
pd.DataFrame(data, columns=['year', 'state', 'pop'])
Out[29]:
In [30]:
frame.columns
Out[30]:
In [32]:
frame['year']
Out[32]:
In [33]:
frame.year
Out[33]:
In [35]:
frame.loc[1]
Out[35]:
In [36]:
frame['debt'] = 17.5
In [37]:
frame
Out[37]:
In [40]:
frame['debt'] = np.arange(6.)
In [41]:
frame
Out[41]:
In [42]:
val = pd.Series([-1.2, -1.5, -1.7], index=[0, 1, 4])
In [43]:
frame['debt'] = val
frame
Out[43]:
In [44]:
frame['eastern'] = frame.state == 'Ohio'
In [45]:
frame
Out[45]:
In [46]:
del frame['eastern']
In [47]:
frame
Out[47]:
In [48]:
frame.columns
Out[48]:
In [49]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
In [50]:
frame3 = pd.DataFrame(pop)
In [51]:
frame3
Out[51]:
In [52]:
frame3.T
Out[52]:
In [53]:
type(frame3['Ohio'][:-1])
Out[53]:
In [55]:
frame3['Ohio'][:-1]
Out[55]:
In [56]:
frame3.values
Out[56]:
In [57]:
res = frame3.values
In [58]:
res.shape
Out[58]:
In [59]:
type(res.shape)
Out[59]:
In [60]:
frame = pd.DataFrame(np.arange(9.).reshape((3,3)),
index=['a', 'c', 'd'],
columns=['Ohio', 'Texas', 'California'])
In [61]:
frame
Out[61]:
In [63]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
In [64]:
frame2
Out[64]:
In [65]:
frame3 = frame2.reindex(columns = ['Ohio', 'Texas', 'Georgia'])
In [66]:
frame3
Out[66]:
In [67]:
data = pd.DataFrame(np.arange(16).reshape((4,4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
In [68]:
data
Out[68]:
In [69]:
data.loc['Colorado', ['two', 'three']]
Out[69]:
In [70]:
type(data.loc['Colorado', ['two', 'three']])
Out[70]:
In [71]:
data.iloc[2]
Out[71]:
In [72]:
data.iloc[:, :3]
Out[72]:
In [73]:
data.iloc[:, :3][data.three > 5]
Out[73]:
In [79]:
data.at['Colorado', 'three']
Out[79]:
In [80]:
frame = pd.DataFrame(np.random.randn(4,3), columns = list('bde'),
index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [81]:
frame
Out[81]:
In [82]:
np.abs(frame)
Out[82]:
In [83]:
f = lambda x: x.max() - x.min()
In [84]:
frame.apply(f)
Out[84]:
In [86]:
frame.apply(f, axis='columns')
Out[86]:
In [87]:
def f(x):
return pd.Series([x.min(), x.max()], index=['min', 'max'])
In [88]:
frame.apply(f)
Out[88]:
In [89]:
format = lambda x: '%.2f' % x
In [90]:
frame.applymap(format)
Out[90]:
In [91]:
frame['e']
Out[91]:
In [92]:
frame['e'].map(format)
Out[92]:
In [93]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
In [94]:
obj
Out[94]:
In [95]:
obj.sort_index()
Out[95]:
In [96]:
frame
Out[96]:
In [97]:
frame.sort_index()
Out[97]:
In [98]:
frame.sort_index(axis=0)
Out[98]:
In [101]:
frame.sort_index(axis='columns', ascending=False)
Out[101]:
In [102]:
frame
Out[102]:
In [103]:
frame.sort_values(by='e')
Out[103]:
In [104]:
frame.sort_values(by=['e', 'd'])
Out[104]:
In [105]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]], index=['a', 'b', 'c', 'd'], columns=['one', 'two'])
In [106]:
df
Out[106]:
In [107]:
df.sum()
Out[107]:
In [108]:
df.sum(axis=1)
Out[108]:
In [109]:
df.T
Out[109]:
In [110]:
df.T.sum(axis=1)
Out[110]:
In [111]:
df.mean()
Out[111]:
In [112]:
df
Out[112]:
In [113]:
hmm = (1.40+7.10+0.75)/3
In [114]:
hmm
Out[114]:
In [115]:
df.mean(skipna=False)
Out[115]:
In [116]:
df.drop('c')
Out[116]:
In [119]:
df.drop('c').mean(skipna=False)
Out[119]:
In [120]:
df
Out[120]:
In [121]:
df.fillna(0)
Out[121]:
In [122]:
df.fillna(0).mean(skipna=False)
Out[122]:
In [123]:
df.cumsum()
Out[123]:
In [124]:
df['three'] = df['one'].cumsum()
In [125]:
df
Out[125]:
In [126]:
data = pd.DataFrame({'Qu1': [1, 3, 4, 3, 4],
'Qu2': [2, 3, 1, 2, 3],
'Qu3': [1, 5, 2, 4, 4]})
In [127]:
data
Out[127]:
In [129]:
data['Qu1'].value_counts()
Out[129]:
In [137]:
result = data.apply(pd.value_counts)
In [138]:
result
Out[138]:
In [ ]: