In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])

In [3]:
print(df.head())


   sepal length  sepal width  petal length  petal width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa

用Series.apply也可以实现map的功能


In [4]:
def swap(original_value):
    if original_value == 'Iris-setosa':
        return 'SET'
    if original_value == 'Iris-virginica':
        return 'VIR'
    if original_value == 'Iris-versicolor':
        return 'VER'

newdf=df['class'].apply(swap)
newdf.head()


Out[4]:
0    SET
1    SET
2    SET
3    SET
4    SET
Name: class, dtype: object

In [13]:
df['class']=df['class'].map({'Iris-setosa':'SET','Iris-virginica':'VIR','Iris-versicolor':'VER'})
df.head()


Out[13]:
sepal length sepal width petal length petal width class
0 5.1 3.5 1.4 0.2 SET
1 4.9 3.0 1.4 0.2 SET
2 4.7 3.2 1.3 0.2 SET
3 4.6 3.1 1.5 0.2 SET
4 5.0 3.6 1.4 0.2 SET

In [15]:
df['wide petal']=df['petal width'].apply(lambda v: 1 if v >1.3 else 0)
df.head()


Out[15]:
sepal length sepal width petal length petal width class wide petal
0 5.1 3.5 1.4 0.2 SET 0
1 4.9 3.0 1.4 0.2 SET 0
2 4.7 3.2 1.3 0.2 SET 0
3 4.6 3.1 1.5 0.2 SET 0
4 5.0 3.6 1.4 0.2 SET 0

In [18]:
df['petal area']=df.apply(lambda r: r['petal length'] * r['petal width'],axis=1)
df.head()


Out[18]:
sepal length sepal width petal length petal width class wide petal petal area
0 5.1 3.5 1.4 0.2 SET 0 0.28
1 4.9 3.0 1.4 0.2 SET 0 0.28
2 4.7 3.2 1.3 0.2 SET 0 0.26
3 4.6 3.1 1.5 0.2 SET 0 0.30
4 5.0 3.6 1.4 0.2 SET 0 0.28

axis=1表明按行取x


In [21]:
df.applymap(lambda v:np.log(v) if type(v)==float else v)


Out[21]:
sepal length sepal width petal length petal width class wide petal petal area
0 1.629241 1.252763 0.336472 -1.609438 SET 0 -1.272966
1 1.589235 1.098612 0.336472 -1.609438 SET 0 -1.272966
2 1.547563 1.163151 0.262364 -1.609438 SET 0 -1.347074
3 1.526056 1.131402 0.405465 -1.609438 SET 0 -1.203973
4 1.609438 1.280934 0.336472 -1.609438 SET 0 -1.272966
5 1.686399 1.360977 0.530628 -0.916291 SET 0 -0.385662
6 1.526056 1.223775 0.336472 -1.203973 SET 0 -0.867501
7 1.609438 1.223775 0.405465 -1.609438 SET 0 -1.203973
8 1.481605 1.064711 0.336472 -1.609438 SET 0 -1.272966
9 1.589235 1.131402 0.405465 -2.302585 SET 0 -1.897120
10 1.686399 1.308333 0.405465 -1.609438 SET 0 -1.203973
11 1.568616 1.223775 0.470004 -1.609438 SET 0 -1.139434
12 1.568616 1.098612 0.336472 -2.302585 SET 0 -1.966113
13 1.458615 1.098612 0.095310 -2.302585 SET 0 -2.207275
14 1.757858 1.386294 0.182322 -1.609438 SET 0 -1.427116
15 1.740466 1.481605 0.405465 -0.916291 SET 0 -0.510826
16 1.686399 1.360977 0.262364 -0.916291 SET 0 -0.653926
17 1.629241 1.252763 0.336472 -1.203973 SET 0 -0.867501
18 1.740466 1.335001 0.530628 -1.203973 SET 0 -0.673345
19 1.629241 1.335001 0.405465 -1.203973 SET 0 -0.798508
20 1.686399 1.223775 0.530628 -1.609438 SET 0 -1.078810
21 1.629241 1.308333 0.405465 -0.916291 SET 0 -0.510826
22 1.526056 1.280934 0.000000 -1.609438 SET 0 -1.609438
23 1.629241 1.193922 0.530628 -0.693147 SET 0 -0.162519
24 1.568616 1.223775 0.641854 -1.609438 SET 0 -0.967584
25 1.609438 1.098612 0.470004 -1.609438 SET 0 -1.139434
26 1.609438 1.223775 0.470004 -0.916291 SET 0 -0.446287
27 1.648659 1.252763 0.405465 -1.609438 SET 0 -1.203973
28 1.648659 1.223775 0.336472 -1.609438 SET 0 -1.272966
29 1.547563 1.163151 0.470004 -1.609438 SET 0 -1.139434
... ... ... ... ... ... ... ...
120 1.931521 1.163151 1.740466 0.832909 VIR 1 2.573375
121 1.722767 1.029619 1.589235 0.693147 VIR 1 2.282382
122 2.041220 1.029619 1.902108 0.693147 VIR 1 2.595255
123 1.840550 0.993252 1.589235 0.587787 VIR 1 2.177022
124 1.902108 1.193922 1.740466 0.741937 VIR 1 2.482404
125 1.974081 1.163151 1.791759 0.587787 VIR 1 2.379546
126 1.824549 1.029619 1.568616 0.587787 VIR 1 2.156403
127 1.808289 1.098612 1.589235 0.587787 VIR 1 2.177022
128 1.856298 1.029619 1.722767 0.741937 VIR 1 2.464704
129 1.974081 1.098612 1.757858 0.470004 VIR 1 2.227862
130 2.001480 1.029619 1.808289 0.641854 VIR 1 2.450143
131 2.066863 1.335001 1.856298 0.693147 VIR 1 2.549445
132 1.856298 1.029619 1.722767 0.788457 VIR 1 2.511224
133 1.840550 1.029619 1.629241 0.405465 VIR 1 2.034706
134 1.808289 0.955511 1.722767 0.336472 VIR 1 2.059239
135 2.041220 1.098612 1.808289 0.832909 VIR 1 2.641198
136 1.840550 1.223775 1.722767 0.875469 VIR 1 2.598235
137 1.856298 1.131402 1.704748 0.587787 VIR 1 2.292535
138 1.791759 1.098612 1.568616 0.587787 VIR 1 2.156403
139 1.931521 1.131402 1.686399 0.741937 VIR 1 2.428336
140 1.902108 1.131402 1.722767 0.875469 VIR 1 2.598235
141 1.931521 1.131402 1.629241 0.832909 VIR 1 2.462150
142 1.757858 0.993252 1.629241 0.641854 VIR 1 2.271094
143 1.916923 1.163151 1.774952 0.832909 VIR 1 2.607861
144 1.902108 1.193922 1.740466 0.916291 VIR 1 2.656757
145 1.902108 1.098612 1.648659 0.832909 VIR 1 2.481568
146 1.840550 0.916291 1.609438 0.641854 VIR 1 2.251292
147 1.871802 1.098612 1.648659 0.693147 VIR 1 2.341806
148 1.824549 1.223775 1.686399 0.832909 VIR 1 2.519308
149 1.774952 1.098612 1.629241 0.587787 VIR 1 2.217027

150 rows × 7 columns

applymap是按cell进行操作 apply是按列或者行操作


In [22]:
df.groupby('class').mean()


Out[22]:
sepal length sepal width petal length petal width wide petal petal area
class
SET 5.006 3.418 1.464 0.244 0.00 0.3628
VER 5.936 2.770 4.260 1.326 0.44 5.7204
VIR 6.588 2.974 5.552 2.026 1.00 11.2962

class中就三个值,分别对这三个值进行总结


In [23]:
df.groupby('class').describe()


Out[23]:
petal area petal length petal width sepal length sepal width wide petal
class
SET count 50.000000 50.000000 50.000000 50.000000 50.000000 50.000000
mean 0.362800 1.464000 0.244000 5.006000 3.418000 0.000000
std 0.183248 0.173511 0.107210 0.352490 0.381024 0.000000
min 0.110000 1.000000 0.100000 4.300000 2.300000 0.000000
25% 0.265000 1.400000 0.200000 4.800000 3.125000 0.000000
50% 0.300000 1.500000 0.200000 5.000000 3.400000 0.000000
75% 0.420000 1.575000 0.300000 5.200000 3.675000 0.000000
max 0.960000 1.900000 0.600000 5.800000 4.400000 0.000000
VER count 50.000000 50.000000 50.000000 50.000000 50.000000 50.000000
mean 5.720400 4.260000 1.326000 5.936000 2.770000 0.440000
std 1.368403 0.469911 0.197753 0.516171 0.313798 0.501427
min 3.300000 3.000000 1.000000 4.900000 2.000000 0.000000
25% 4.860000 4.000000 1.200000 5.600000 2.525000 0.000000
50% 5.615000 4.350000 1.300000 5.900000 2.800000 0.000000
75% 6.750000 4.600000 1.500000 6.300000 3.000000 1.000000
max 8.640000 5.100000 1.800000 7.000000 3.400000 1.000000
VIR count 50.000000 50.000000 50.000000 50.000000 50.000000 50.000000
mean 11.296200 5.552000 2.026000 6.588000 2.974000 1.000000
std 2.157412 0.551895 0.274650 0.635880 0.322497 0.000000
min 7.500000 4.500000 1.400000 4.900000 2.200000 1.000000
25% 9.717500 5.100000 1.800000 6.225000 2.800000 1.000000
50% 11.445000 5.550000 2.000000 6.500000 3.000000 1.000000
75% 12.790000 5.875000 2.300000 6.900000 3.175000 1.000000
max 15.870000 6.900000 2.500000 7.900000 3.800000 1.000000

In [26]:
df.groupby('petal width')['class'].unique().to_frame()


Out[26]:
class
petal width
0.1 [SET]
0.2 [SET]
0.3 [SET]
0.4 [SET]
0.5 [SET]
0.6 [SET]
1.0 [VER]
1.1 [VER]
1.2 [VER]
1.3 [VER]
1.4 [VER, VIR]
1.5 [VER, VIR]
1.6 [VER, VIR]
1.7 [VER, VIR]
1.8 [VER, VIR]
1.9 [VIR]
2.0 [VIR]
2.1 [VIR]
2.2 [VIR]
2.3 [VIR]
2.4 [VIR]
2.5 [VIR]

groupby就有以点像pivotable,进行乾坤大螺仪,移到边上去,行数不变。只是提供了另外一个视角而已。