In [1]:
s = pd.Series([4, 7, -5, 3])
s
Out[1]:
In [2]:
s.values
Out[2]:
In [3]:
type(s.values)
Out[3]:
In [4]:
s.index
Out[4]:
In [5]:
type(s.index)
Out[5]:
In [6]:
s * 2
Out[6]:
In [7]:
np.exp(s)
Out[7]:
In [9]:
s2 = pd.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])
s2
Out[9]:
In [10]:
s2.index
Out[10]:
In [11]:
s2['a']
Out[11]:
In [12]:
s2['b':'c']
Out[12]:
In [13]:
s2[["a", "b"]]
Out[13]:
In [14]:
s2[2]
Out[14]:
In [15]:
s2[1:4]
Out[15]:
In [16]:
s2[[2, 1]]
Out[16]:
In [17]:
s2[s2 > 0]
Out[17]:
In [18]:
"a" in s2, "e" in s2
Out[18]:
In [20]:
for i, j in s2.iteritems():
print(i, j)
In [21]:
s2["d":"a"]
Out[21]:
In [22]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
s3 = pd.Series(sdata)
s3
Out[22]:
In [23]:
states = ['Califonia', 'Ohio', 'Oregon', 'Texas']
s4 = pd.Series(sdata, index=states)
s4
Out[23]:
In [24]:
pd.isnull(s)
Out[24]:
In [25]:
pd.notnull(s4)
Out[25]:
In [26]:
s4.isnull()
Out[26]:
In [27]:
s4.notnull()
Out[27]:
In [28]:
print(s3.values, s4.values)
s3.values + s4.values
Out[28]:
In [29]:
s3 + s4 #Utah가 NaN인 것을 보아하니 값이 둘 다 있을 때만 연산이 되고 하나라도 없으면 NaN으로 처리되나보네
Out[29]:
In [30]:
s4
Out[30]:
In [31]:
s4.name = "population"
s4
Out[31]:
In [32]:
s4.index.name = "state"
s4
Out[32]:
In [33]:
s
Out[33]:
In [34]:
s.index
Out[34]:
In [35]:
s.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
s
Out[35]:
In [36]:
s.index
Out[36]:
In [37]:
data = {
'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2001, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]
}
df = pd.DataFrame(data)
df
Out[37]:
In [38]:
pd.DataFrame(data, columns=['year', 'state', 'pop'])
Out[38]:
In [39]:
df.dtypes
Out[39]:
In [40]:
df2 = pd.DataFrame(data,
columns=['year', 'state', 'pop', 'debt'],
index=['one', 'two', 'three', 'four', 'five'])
df2
Out[40]:
In [41]:
df["state"]
Out[41]:
In [44]:
type(df["state"]), type([df["state"]])
Out[44]:
In [45]:
[df["state"]]
Out[45]:
In [46]:
df.state
Out[46]:
In [48]:
df2['debt'] = 16.5, 16.2, 16.3, 16.7, 16.2
df2
Out[48]:
In [49]:
df2['debt'] = 16.5
df2
Out[49]:
In [50]:
df2['debt'] = np.arange(5)
df2
Out[50]:
In [51]:
df2['debt'] = pd.DataFrame([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
df2
Out[51]:
In [52]:
df2['eastern'] = df2.state == 'Ohio'
df2
Out[52]:
In [54]:
del df2["eastern"]
df2
Out[54]:
In [55]:
x = [3, 6, 1, 4]
sorted(x)
Out[55]:
In [56]:
x
Out[56]:
In [57]:
x.sort()
x
Out[57]:
In [59]:
s = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
s
Out[59]:
In [60]:
s2 = s.drop('c')
s2
Out[60]:
In [61]:
s
Out[61]:
In [62]:
s.drop(["b", "c"])
Out[62]:
In [63]:
df = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
df
Out[63]:
In [64]:
df.drop(['Colorado', 'Ohio'])
Out[64]:
In [67]:
df.drop('two', axis=1)
Out[67]:
In [68]:
df.drop(['two', 'four'], axis=1)
Out[68]:
In [69]:
pop = {
'Nevada': {
2001: 2.4,
2002: 2.9
},
'Ohio': {
2000: 1.5,
2001: 1.7,
2002: 3.6
}
}
In [70]:
df3 = pd.DataFrame(pop)
df3
Out[70]:
In [74]:
pdata = {
'Ohio': df3['Ohio'][:-1],
'Nevada': df3['Nevada'][:3]
}
pd.DataFrame(pdata)
Out[74]:
In [75]:
df3.values
Out[75]:
In [76]:
df2.values
Out[76]:
In [61]:
df3.values
Out[61]:
In [62]:
df2.values
Out[62]:
In [77]:
df2
Out[77]:
In [78]:
df2["year"]
Out[78]:
In [79]:
df2.year
Out[79]:
In [80]:
df2[["state", "debt", "year"]]
Out[80]:
In [81]:
df2[["year"]]
Out[81]: