In [1]:
import pandas as pd
import numpy as np
In [2]:
df=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
In [3]:
print(df.head())
sepal length sepal width petal length petal width class
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
用Series.apply也可以实现map的功能
In [4]:
def swap(original_value):
if original_value == 'Iris-setosa':
return 'SET'
if original_value == 'Iris-virginica':
return 'VIR'
if original_value == 'Iris-versicolor':
return 'VER'
newdf=df['class'].apply(swap)
newdf.head()
Out[4]:
0 SET
1 SET
2 SET
3 SET
4 SET
Name: class, dtype: object
In [13]:
df['class']=df['class'].map({'Iris-setosa':'SET','Iris-virginica':'VIR','Iris-versicolor':'VER'})
df.head()
Out[13]:
sepal length
sepal width
petal length
petal width
class
0
5.1
3.5
1.4
0.2
SET
1
4.9
3.0
1.4
0.2
SET
2
4.7
3.2
1.3
0.2
SET
3
4.6
3.1
1.5
0.2
SET
4
5.0
3.6
1.4
0.2
SET
In [15]:
df['wide petal']=df['petal width'].apply(lambda v: 1 if v >1.3 else 0)
df.head()
Out[15]:
sepal length
sepal width
petal length
petal width
class
wide petal
0
5.1
3.5
1.4
0.2
SET
0
1
4.9
3.0
1.4
0.2
SET
0
2
4.7
3.2
1.3
0.2
SET
0
3
4.6
3.1
1.5
0.2
SET
0
4
5.0
3.6
1.4
0.2
SET
0
In [18]:
df['petal area']=df.apply(lambda r: r['petal length'] * r['petal width'],axis=1)
df.head()
Out[18]:
sepal length
sepal width
petal length
petal width
class
wide petal
petal area
0
5.1
3.5
1.4
0.2
SET
0
0.28
1
4.9
3.0
1.4
0.2
SET
0
0.28
2
4.7
3.2
1.3
0.2
SET
0
0.26
3
4.6
3.1
1.5
0.2
SET
0
0.30
4
5.0
3.6
1.4
0.2
SET
0
0.28
axis=1表明按行取x
In [21]:
df.applymap(lambda v:np.log(v) if type(v)==float else v)
Out[21]:
sepal length
sepal width
petal length
petal width
class
wide petal
petal area
0
1.629241
1.252763
0.336472
-1.609438
SET
0
-1.272966
1
1.589235
1.098612
0.336472
-1.609438
SET
0
-1.272966
2
1.547563
1.163151
0.262364
-1.609438
SET
0
-1.347074
3
1.526056
1.131402
0.405465
-1.609438
SET
0
-1.203973
4
1.609438
1.280934
0.336472
-1.609438
SET
0
-1.272966
5
1.686399
1.360977
0.530628
-0.916291
SET
0
-0.385662
6
1.526056
1.223775
0.336472
-1.203973
SET
0
-0.867501
7
1.609438
1.223775
0.405465
-1.609438
SET
0
-1.203973
8
1.481605
1.064711
0.336472
-1.609438
SET
0
-1.272966
9
1.589235
1.131402
0.405465
-2.302585
SET
0
-1.897120
10
1.686399
1.308333
0.405465
-1.609438
SET
0
-1.203973
11
1.568616
1.223775
0.470004
-1.609438
SET
0
-1.139434
12
1.568616
1.098612
0.336472
-2.302585
SET
0
-1.966113
13
1.458615
1.098612
0.095310
-2.302585
SET
0
-2.207275
14
1.757858
1.386294
0.182322
-1.609438
SET
0
-1.427116
15
1.740466
1.481605
0.405465
-0.916291
SET
0
-0.510826
16
1.686399
1.360977
0.262364
-0.916291
SET
0
-0.653926
17
1.629241
1.252763
0.336472
-1.203973
SET
0
-0.867501
18
1.740466
1.335001
0.530628
-1.203973
SET
0
-0.673345
19
1.629241
1.335001
0.405465
-1.203973
SET
0
-0.798508
20
1.686399
1.223775
0.530628
-1.609438
SET
0
-1.078810
21
1.629241
1.308333
0.405465
-0.916291
SET
0
-0.510826
22
1.526056
1.280934
0.000000
-1.609438
SET
0
-1.609438
23
1.629241
1.193922
0.530628
-0.693147
SET
0
-0.162519
24
1.568616
1.223775
0.641854
-1.609438
SET
0
-0.967584
25
1.609438
1.098612
0.470004
-1.609438
SET
0
-1.139434
26
1.609438
1.223775
0.470004
-0.916291
SET
0
-0.446287
27
1.648659
1.252763
0.405465
-1.609438
SET
0
-1.203973
28
1.648659
1.223775
0.336472
-1.609438
SET
0
-1.272966
29
1.547563
1.163151
0.470004
-1.609438
SET
0
-1.139434
...
...
...
...
...
...
...
...
120
1.931521
1.163151
1.740466
0.832909
VIR
1
2.573375
121
1.722767
1.029619
1.589235
0.693147
VIR
1
2.282382
122
2.041220
1.029619
1.902108
0.693147
VIR
1
2.595255
123
1.840550
0.993252
1.589235
0.587787
VIR
1
2.177022
124
1.902108
1.193922
1.740466
0.741937
VIR
1
2.482404
125
1.974081
1.163151
1.791759
0.587787
VIR
1
2.379546
126
1.824549
1.029619
1.568616
0.587787
VIR
1
2.156403
127
1.808289
1.098612
1.589235
0.587787
VIR
1
2.177022
128
1.856298
1.029619
1.722767
0.741937
VIR
1
2.464704
129
1.974081
1.098612
1.757858
0.470004
VIR
1
2.227862
130
2.001480
1.029619
1.808289
0.641854
VIR
1
2.450143
131
2.066863
1.335001
1.856298
0.693147
VIR
1
2.549445
132
1.856298
1.029619
1.722767
0.788457
VIR
1
2.511224
133
1.840550
1.029619
1.629241
0.405465
VIR
1
2.034706
134
1.808289
0.955511
1.722767
0.336472
VIR
1
2.059239
135
2.041220
1.098612
1.808289
0.832909
VIR
1
2.641198
136
1.840550
1.223775
1.722767
0.875469
VIR
1
2.598235
137
1.856298
1.131402
1.704748
0.587787
VIR
1
2.292535
138
1.791759
1.098612
1.568616
0.587787
VIR
1
2.156403
139
1.931521
1.131402
1.686399
0.741937
VIR
1
2.428336
140
1.902108
1.131402
1.722767
0.875469
VIR
1
2.598235
141
1.931521
1.131402
1.629241
0.832909
VIR
1
2.462150
142
1.757858
0.993252
1.629241
0.641854
VIR
1
2.271094
143
1.916923
1.163151
1.774952
0.832909
VIR
1
2.607861
144
1.902108
1.193922
1.740466
0.916291
VIR
1
2.656757
145
1.902108
1.098612
1.648659
0.832909
VIR
1
2.481568
146
1.840550
0.916291
1.609438
0.641854
VIR
1
2.251292
147
1.871802
1.098612
1.648659
0.693147
VIR
1
2.341806
148
1.824549
1.223775
1.686399
0.832909
VIR
1
2.519308
149
1.774952
1.098612
1.629241
0.587787
VIR
1
2.217027
150 rows × 7 columns
applymap是按cell进行操作 apply是按列或者行操作
In [22]:
df.groupby('class').mean()
Out[22]:
sepal length
sepal width
petal length
petal width
wide petal
petal area
class
SET
5.006
3.418
1.464
0.244
0.00
0.3628
VER
5.936
2.770
4.260
1.326
0.44
5.7204
VIR
6.588
2.974
5.552
2.026
1.00
11.2962
class中就三个值,分别对这三个值进行总结
In [23]:
df.groupby('class').describe()
Out[23]:
petal area
petal length
petal width
sepal length
sepal width
wide petal
class
SET
count
50.000000
50.000000
50.000000
50.000000
50.000000
50.000000
mean
0.362800
1.464000
0.244000
5.006000
3.418000
0.000000
std
0.183248
0.173511
0.107210
0.352490
0.381024
0.000000
min
0.110000
1.000000
0.100000
4.300000
2.300000
0.000000
25%
0.265000
1.400000
0.200000
4.800000
3.125000
0.000000
50%
0.300000
1.500000
0.200000
5.000000
3.400000
0.000000
75%
0.420000
1.575000
0.300000
5.200000
3.675000
0.000000
max
0.960000
1.900000
0.600000
5.800000
4.400000
0.000000
VER
count
50.000000
50.000000
50.000000
50.000000
50.000000
50.000000
mean
5.720400
4.260000
1.326000
5.936000
2.770000
0.440000
std
1.368403
0.469911
0.197753
0.516171
0.313798
0.501427
min
3.300000
3.000000
1.000000
4.900000
2.000000
0.000000
25%
4.860000
4.000000
1.200000
5.600000
2.525000
0.000000
50%
5.615000
4.350000
1.300000
5.900000
2.800000
0.000000
75%
6.750000
4.600000
1.500000
6.300000
3.000000
1.000000
max
8.640000
5.100000
1.800000
7.000000
3.400000
1.000000
VIR
count
50.000000
50.000000
50.000000
50.000000
50.000000
50.000000
mean
11.296200
5.552000
2.026000
6.588000
2.974000
1.000000
std
2.157412
0.551895
0.274650
0.635880
0.322497
0.000000
min
7.500000
4.500000
1.400000
4.900000
2.200000
1.000000
25%
9.717500
5.100000
1.800000
6.225000
2.800000
1.000000
50%
11.445000
5.550000
2.000000
6.500000
3.000000
1.000000
75%
12.790000
5.875000
2.300000
6.900000
3.175000
1.000000
max
15.870000
6.900000
2.500000
7.900000
3.800000
1.000000
In [26]:
df.groupby('petal width')['class'].unique().to_frame()
Out[26]:
class
petal width
0.1
[SET]
0.2
[SET]
0.3
[SET]
0.4
[SET]
0.5
[SET]
0.6
[SET]
1.0
[VER]
1.1
[VER]
1.2
[VER]
1.3
[VER]
1.4
[VER, VIR]
1.5
[VER, VIR]
1.6
[VER, VIR]
1.7
[VER, VIR]
1.8
[VER, VIR]
1.9
[VIR]
2.0
[VIR]
2.1
[VIR]
2.2
[VIR]
2.3
[VIR]
2.4
[VIR]
2.5
[VIR]
groupby就有以点像pivotable,进行乾坤大螺仪,移到边上去,行数不变。只是提供了另外一个视角而已。