In [1]:
import pandas as pd
In [2]:
iris = pd.read_csv("https://github.com/Bio204-class/bio204-datasets/raw/master/iris.csv")
In [3]:
iris.head()
Out[3]:
Sepal.Length
Sepal.Width
Petal.Length
Petal.Width
Species
0
5.1
3.5
1.4
0.2
setosa
1
4.9
3.0
1.4
0.2
setosa
2
4.7
3.2
1.3
0.2
setosa
3
4.6
3.1
1.5
0.2
setosa
4
5.0
3.6
1.4
0.2
setosa
In [4]:
iris.tail()
Out[4]:
Sepal.Length
Sepal.Width
Petal.Length
Petal.Width
Species
145
6.7
3.0
5.2
2.3
virginica
146
6.3
2.5
5.0
1.9
virginica
147
6.5
3.0
5.2
2.0
virginica
148
6.2
3.4
5.4
2.3
virginica
149
5.9
3.0
5.1
1.8
virginica
In [5]:
iris.columns
Out[5]:
Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
'Species'],
dtype='object')
In [6]:
type(iris)
Out[6]:
pandas.core.frame.DataFrame
In [7]:
iris.mean()
Out[7]:
Sepal.Length 5.843333
Sepal.Width 3.057333
Petal.Length 3.758000
Petal.Width 1.199333
dtype: float64
In [8]:
iris.var()
Out[8]:
Sepal.Length 0.685694
Sepal.Width 0.189979
Petal.Length 3.116278
Petal.Width 0.581006
dtype: float64
In [9]:
iris.median()
Out[9]:
Sepal.Length 5.80
Sepal.Width 3.00
Petal.Length 4.35
Petal.Width 1.30
dtype: float64
In [10]:
iris.columns
Out[10]:
Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
'Species'],
dtype='object')
In [11]:
iris.columns = [i.replace('.',"_") for i in iris.columns]
In [12]:
iris.columns
Out[12]:
Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
'Species'],
dtype='object')
In [13]:
iris.head()
Out[13]:
Sepal_Length
Sepal_Width
Petal_Length
Petal_Width
Species
0
5.1
3.5
1.4
0.2
setosa
1
4.9
3.0
1.4
0.2
setosa
2
4.7
3.2
1.3
0.2
setosa
3
4.6
3.1
1.5
0.2
setosa
4
5.0
3.6
1.4
0.2
setosa
In [14]:
iris.Sepal_Length.mean()
Out[14]:
5.843333333333335
In [15]:
iris.query("Species in ('setosa','versicolor')")
Out[15]:
Sepal_Length
Sepal_Width
Petal_Length
Petal_Width
Species
0
5.1
3.5
1.4
0.2
setosa
1
4.9
3.0
1.4
0.2
setosa
2
4.7
3.2
1.3
0.2
setosa
3
4.6
3.1
1.5
0.2
setosa
4
5.0
3.6
1.4
0.2
setosa
5
5.4
3.9
1.7
0.4
setosa
6
4.6
3.4
1.4
0.3
setosa
7
5.0
3.4
1.5
0.2
setosa
8
4.4
2.9
1.4
0.2
setosa
9
4.9
3.1
1.5
0.1
setosa
10
5.4
3.7
1.5
0.2
setosa
11
4.8
3.4
1.6
0.2
setosa
12
4.8
3.0
1.4
0.1
setosa
13
4.3
3.0
1.1
0.1
setosa
14
5.8
4.0
1.2
0.2
setosa
15
5.7
4.4
1.5
0.4
setosa
16
5.4
3.9
1.3
0.4
setosa
17
5.1
3.5
1.4
0.3
setosa
18
5.7
3.8
1.7
0.3
setosa
19
5.1
3.8
1.5
0.3
setosa
20
5.4
3.4
1.7
0.2
setosa
21
5.1
3.7
1.5
0.4
setosa
22
4.6
3.6
1.0
0.2
setosa
23
5.1
3.3
1.7
0.5
setosa
24
4.8
3.4
1.9
0.2
setosa
25
5.0
3.0
1.6
0.2
setosa
26
5.0
3.4
1.6
0.4
setosa
27
5.2
3.5
1.5
0.2
setosa
28
5.2
3.4
1.4
0.2
setosa
29
4.7
3.2
1.6
0.2
setosa
...
...
...
...
...
...
70
5.9
3.2
4.8
1.8
versicolor
71
6.1
2.8
4.0
1.3
versicolor
72
6.3
2.5
4.9
1.5
versicolor
73
6.1
2.8
4.7
1.2
versicolor
74
6.4
2.9
4.3
1.3
versicolor
75
6.6
3.0
4.4
1.4
versicolor
76
6.8
2.8
4.8
1.4
versicolor
77
6.7
3.0
5.0
1.7
versicolor
78
6.0
2.9
4.5
1.5
versicolor
79
5.7
2.6
3.5
1.0
versicolor
80
5.5
2.4
3.8
1.1
versicolor
81
5.5
2.4
3.7
1.0
versicolor
82
5.8
2.7
3.9
1.2
versicolor
83
6.0
2.7
5.1
1.6
versicolor
84
5.4
3.0
4.5
1.5
versicolor
85
6.0
3.4
4.5
1.6
versicolor
86
6.7
3.1
4.7
1.5
versicolor
87
6.3
2.3
4.4
1.3
versicolor
88
5.6
3.0
4.1
1.3
versicolor
89
5.5
2.5
4.0
1.3
versicolor
90
5.5
2.6
4.4
1.2
versicolor
91
6.1
3.0
4.6
1.4
versicolor
92
5.8
2.6
4.0
1.2
versicolor
93
5.0
2.3
3.3
1.0
versicolor
94
5.6
2.7
4.2
1.3
versicolor
95
5.7
3.0
4.2
1.2
versicolor
96
5.7
2.9
4.2
1.3
versicolor
97
6.2
2.9
4.3
1.3
versicolor
98
5.1
2.5
3.0
1.1
versicolor
99
5.7
2.8
4.1
1.3
versicolor
100 rows × 5 columns
In [16]:
iris.groupby("Species").describe()
Out[16]:
Petal_Length
Petal_Width
Sepal_Length
Sepal_Width
Species
setosa
count
50.000000
50.000000
50.000000
50.000000
mean
1.462000
0.246000
5.006000
3.428000
std
0.173664
0.105386
0.352490
0.379064
min
1.000000
0.100000
4.300000
2.300000
25%
1.400000
0.200000
4.800000
3.200000
50%
1.500000
0.200000
5.000000
3.400000
75%
1.575000
0.300000
5.200000
3.675000
max
1.900000
0.600000
5.800000
4.400000
versicolor
count
50.000000
50.000000
50.000000
50.000000
mean
4.260000
1.326000
5.936000
2.770000
std
0.469911
0.197753
0.516171
0.313798
min
3.000000
1.000000
4.900000
2.000000
25%
4.000000
1.200000
5.600000
2.525000
50%
4.350000
1.300000
5.900000
2.800000
75%
4.600000
1.500000
6.300000
3.000000
max
5.100000
1.800000
7.000000
3.400000
virginica
count
50.000000
50.000000
50.000000
50.000000
mean
5.552000
2.026000
6.588000
2.974000
std
0.551895
0.274650
0.635880
0.322497
min
4.500000
1.400000
4.900000
2.200000
25%
5.100000
1.800000
6.225000
2.800000
50%
5.550000
2.000000
6.500000
3.000000
75%
5.875000
2.300000
6.900000
3.175000
max
6.900000
2.500000
7.900000
3.800000
In [17]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
In [18]:
iris.hist()
pass
In [19]:
iris.groupby("Species").Petal_Length.hist(alpha=0.5)
plt.xlabel("Petal Length (mm)")
plt.ylabel("Frequency")
plt.title("Distribution of Petal Length")
pass
In [20]:
iris.groupby("Species").Sepal_Length.hist()
Out[20]:
Species
setosa Axes(0.125,0.125;0.775x0.775)
versicolor Axes(0.125,0.125;0.775x0.775)
virginica Axes(0.125,0.125;0.775x0.775)
Name: Sepal_Length, dtype: object
In [21]:
iris.boxplot()
pass
/Users/pmagwene/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning:
The default value for 'return_type' will change to 'axes' in a future release.
To use the future behavior now, set return_type='axes'.
To keep the previous behavior and silence this warning, set return_type='dict'.
if __name__ == '__main__':
In [22]:
matplotlib.style.use("ggplot")
iris.boxplot("Petal_Length", by="Species")
plt.ylabel("Petal Length (mm)")
Out[22]:
<matplotlib.text.Text at 0x10a5689b0>
In [23]:
import numpy as np
iris["season"] = np.random.choice(["Fall","Spring"], size=len(iris))
In [24]:
iris
Out[24]:
Sepal_Length
Sepal_Width
Petal_Length
Petal_Width
Species
season
0
5.1
3.5
1.4
0.2
setosa
Spring
1
4.9
3.0
1.4
0.2
setosa
Fall
2
4.7
3.2
1.3
0.2
setosa
Spring
3
4.6
3.1
1.5
0.2
setosa
Fall
4
5.0
3.6
1.4
0.2
setosa
Fall
5
5.4
3.9
1.7
0.4
setosa
Spring
6
4.6
3.4
1.4
0.3
setosa
Fall
7
5.0
3.4
1.5
0.2
setosa
Fall
8
4.4
2.9
1.4
0.2
setosa
Fall
9
4.9
3.1
1.5
0.1
setosa
Spring
10
5.4
3.7
1.5
0.2
setosa
Fall
11
4.8
3.4
1.6
0.2
setosa
Fall
12
4.8
3.0
1.4
0.1
setosa
Spring
13
4.3
3.0
1.1
0.1
setosa
Spring
14
5.8
4.0
1.2
0.2
setosa
Fall
15
5.7
4.4
1.5
0.4
setosa
Fall
16
5.4
3.9
1.3
0.4
setosa
Spring
17
5.1
3.5
1.4
0.3
setosa
Fall
18
5.7
3.8
1.7
0.3
setosa
Fall
19
5.1
3.8
1.5
0.3
setosa
Spring
20
5.4
3.4
1.7
0.2
setosa
Spring
21
5.1
3.7
1.5
0.4
setosa
Spring
22
4.6
3.6
1.0
0.2
setosa
Fall
23
5.1
3.3
1.7
0.5
setosa
Spring
24
4.8
3.4
1.9
0.2
setosa
Fall
25
5.0
3.0
1.6
0.2
setosa
Fall
26
5.0
3.4
1.6
0.4
setosa
Fall
27
5.2
3.5
1.5
0.2
setosa
Fall
28
5.2
3.4
1.4
0.2
setosa
Spring
29
4.7
3.2
1.6
0.2
setosa
Spring
...
...
...
...
...
...
...
120
6.9
3.2
5.7
2.3
virginica
Fall
121
5.6
2.8
4.9
2.0
virginica
Spring
122
7.7
2.8
6.7
2.0
virginica
Fall
123
6.3
2.7
4.9
1.8
virginica
Spring
124
6.7
3.3
5.7
2.1
virginica
Fall
125
7.2
3.2
6.0
1.8
virginica
Fall
126
6.2
2.8
4.8
1.8
virginica
Fall
127
6.1
3.0
4.9
1.8
virginica
Fall
128
6.4
2.8
5.6
2.1
virginica
Fall
129
7.2
3.0
5.8
1.6
virginica
Fall
130
7.4
2.8
6.1
1.9
virginica
Fall
131
7.9
3.8
6.4
2.0
virginica
Fall
132
6.4
2.8
5.6
2.2
virginica
Spring
133
6.3
2.8
5.1
1.5
virginica
Spring
134
6.1
2.6
5.6
1.4
virginica
Fall
135
7.7
3.0
6.1
2.3
virginica
Spring
136
6.3
3.4
5.6
2.4
virginica
Spring
137
6.4
3.1
5.5
1.8
virginica
Fall
138
6.0
3.0
4.8
1.8
virginica
Fall
139
6.9
3.1
5.4
2.1
virginica
Spring
140
6.7
3.1
5.6
2.4
virginica
Fall
141
6.9
3.1
5.1
2.3
virginica
Fall
142
5.8
2.7
5.1
1.9
virginica
Fall
143
6.8
3.2
5.9
2.3
virginica
Fall
144
6.7
3.3
5.7
2.5
virginica
Spring
145
6.7
3.0
5.2
2.3
virginica
Spring
146
6.3
2.5
5.0
1.9
virginica
Spring
147
6.5
3.0
5.2
2.0
virginica
Fall
148
6.2
3.4
5.4
2.3
virginica
Fall
149
5.9
3.0
5.1
1.8
virginica
Spring
150 rows × 6 columns
In [ ]:
Content source: Bio204-class/bio204-notebooks
Similar notebooks: