In [1]:
import pandas as pd

In [2]:
iris = pd.read_csv("https://github.com/Bio204-class/bio204-datasets/raw/master/iris.csv")

In [3]:
iris.head()


Out[3]:
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

In [4]:
iris.tail()


Out[4]:
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

In [5]:
iris.columns


Out[5]:
Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

In [6]:
type(iris)


Out[6]:
pandas.core.frame.DataFrame

In [7]:
iris.mean()


Out[7]:
Sepal.Length    5.843333
Sepal.Width     3.057333
Petal.Length    3.758000
Petal.Width     1.199333
dtype: float64

In [8]:
iris.var()


Out[8]:
Sepal.Length    0.685694
Sepal.Width     0.189979
Petal.Length    3.116278
Petal.Width     0.581006
dtype: float64

In [9]:
iris.median()


Out[9]:
Sepal.Length    5.80
Sepal.Width     3.00
Petal.Length    4.35
Petal.Width     1.30
dtype: float64

In [10]:
iris.columns


Out[10]:
Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

In [11]:
iris.columns = [i.replace('.',"_") for i in iris.columns]

In [12]:
iris.columns


Out[12]:
Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
       'Species'],
      dtype='object')

In [13]:
iris.head()


Out[13]:
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

In [14]:
iris.Sepal_Length.mean()


Out[14]:
5.843333333333335

In [15]:
iris.query("Species in ('setosa','versicolor')")


Out[15]:
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
5 5.4 3.9 1.7 0.4 setosa
6 4.6 3.4 1.4 0.3 setosa
7 5.0 3.4 1.5 0.2 setosa
8 4.4 2.9 1.4 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
10 5.4 3.7 1.5 0.2 setosa
11 4.8 3.4 1.6 0.2 setosa
12 4.8 3.0 1.4 0.1 setosa
13 4.3 3.0 1.1 0.1 setosa
14 5.8 4.0 1.2 0.2 setosa
15 5.7 4.4 1.5 0.4 setosa
16 5.4 3.9 1.3 0.4 setosa
17 5.1 3.5 1.4 0.3 setosa
18 5.7 3.8 1.7 0.3 setosa
19 5.1 3.8 1.5 0.3 setosa
20 5.4 3.4 1.7 0.2 setosa
21 5.1 3.7 1.5 0.4 setosa
22 4.6 3.6 1.0 0.2 setosa
23 5.1 3.3 1.7 0.5 setosa
24 4.8 3.4 1.9 0.2 setosa
25 5.0 3.0 1.6 0.2 setosa
26 5.0 3.4 1.6 0.4 setosa
27 5.2 3.5 1.5 0.2 setosa
28 5.2 3.4 1.4 0.2 setosa
29 4.7 3.2 1.6 0.2 setosa
... ... ... ... ... ...
70 5.9 3.2 4.8 1.8 versicolor
71 6.1 2.8 4.0 1.3 versicolor
72 6.3 2.5 4.9 1.5 versicolor
73 6.1 2.8 4.7 1.2 versicolor
74 6.4 2.9 4.3 1.3 versicolor
75 6.6 3.0 4.4 1.4 versicolor
76 6.8 2.8 4.8 1.4 versicolor
77 6.7 3.0 5.0 1.7 versicolor
78 6.0 2.9 4.5 1.5 versicolor
79 5.7 2.6 3.5 1.0 versicolor
80 5.5 2.4 3.8 1.1 versicolor
81 5.5 2.4 3.7 1.0 versicolor
82 5.8 2.7 3.9 1.2 versicolor
83 6.0 2.7 5.1 1.6 versicolor
84 5.4 3.0 4.5 1.5 versicolor
85 6.0 3.4 4.5 1.6 versicolor
86 6.7 3.1 4.7 1.5 versicolor
87 6.3 2.3 4.4 1.3 versicolor
88 5.6 3.0 4.1 1.3 versicolor
89 5.5 2.5 4.0 1.3 versicolor
90 5.5 2.6 4.4 1.2 versicolor
91 6.1 3.0 4.6 1.4 versicolor
92 5.8 2.6 4.0 1.2 versicolor
93 5.0 2.3 3.3 1.0 versicolor
94 5.6 2.7 4.2 1.3 versicolor
95 5.7 3.0 4.2 1.2 versicolor
96 5.7 2.9 4.2 1.3 versicolor
97 6.2 2.9 4.3 1.3 versicolor
98 5.1 2.5 3.0 1.1 versicolor
99 5.7 2.8 4.1 1.3 versicolor

100 rows × 5 columns


In [16]:
iris.groupby("Species").describe()


Out[16]:
Petal_Length Petal_Width Sepal_Length Sepal_Width
Species
setosa count 50.000000 50.000000 50.000000 50.000000
mean 1.462000 0.246000 5.006000 3.428000
std 0.173664 0.105386 0.352490 0.379064
min 1.000000 0.100000 4.300000 2.300000
25% 1.400000 0.200000 4.800000 3.200000
50% 1.500000 0.200000 5.000000 3.400000
75% 1.575000 0.300000 5.200000 3.675000
max 1.900000 0.600000 5.800000 4.400000
versicolor count 50.000000 50.000000 50.000000 50.000000
mean 4.260000 1.326000 5.936000 2.770000
std 0.469911 0.197753 0.516171 0.313798
min 3.000000 1.000000 4.900000 2.000000
25% 4.000000 1.200000 5.600000 2.525000
50% 4.350000 1.300000 5.900000 2.800000
75% 4.600000 1.500000 6.300000 3.000000
max 5.100000 1.800000 7.000000 3.400000
virginica count 50.000000 50.000000 50.000000 50.000000
mean 5.552000 2.026000 6.588000 2.974000
std 0.551895 0.274650 0.635880 0.322497
min 4.500000 1.400000 4.900000 2.200000
25% 5.100000 1.800000 6.225000 2.800000
50% 5.550000 2.000000 6.500000 3.000000
75% 5.875000 2.300000 6.900000 3.175000
max 6.900000 2.500000 7.900000 3.800000

In [17]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [18]:
iris.hist()
pass



In [19]:
iris.groupby("Species").Petal_Length.hist(alpha=0.5)
plt.xlabel("Petal Length (mm)")
plt.ylabel("Frequency")
plt.title("Distribution of Petal Length")
pass



In [20]:
iris.groupby("Species").Sepal_Length.hist()


Out[20]:
Species
setosa        Axes(0.125,0.125;0.775x0.775)
versicolor    Axes(0.125,0.125;0.775x0.775)
virginica     Axes(0.125,0.125;0.775x0.775)
Name: Sepal_Length, dtype: object

In [21]:
iris.boxplot()
pass


/Users/pmagwene/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  if __name__ == '__main__':

In [22]:
matplotlib.style.use("ggplot")
iris.boxplot("Petal_Length", by="Species")
plt.ylabel("Petal Length (mm)")


Out[22]:
<matplotlib.text.Text at 0x10a5689b0>

In [23]:
import numpy as np
iris["season"] = np.random.choice(["Fall","Spring"], size=len(iris))

In [24]:
iris


Out[24]:
Sepal_Length Sepal_Width Petal_Length Petal_Width Species season
0 5.1 3.5 1.4 0.2 setosa Spring
1 4.9 3.0 1.4 0.2 setosa Fall
2 4.7 3.2 1.3 0.2 setosa Spring
3 4.6 3.1 1.5 0.2 setosa Fall
4 5.0 3.6 1.4 0.2 setosa Fall
5 5.4 3.9 1.7 0.4 setosa Spring
6 4.6 3.4 1.4 0.3 setosa Fall
7 5.0 3.4 1.5 0.2 setosa Fall
8 4.4 2.9 1.4 0.2 setosa Fall
9 4.9 3.1 1.5 0.1 setosa Spring
10 5.4 3.7 1.5 0.2 setosa Fall
11 4.8 3.4 1.6 0.2 setosa Fall
12 4.8 3.0 1.4 0.1 setosa Spring
13 4.3 3.0 1.1 0.1 setosa Spring
14 5.8 4.0 1.2 0.2 setosa Fall
15 5.7 4.4 1.5 0.4 setosa Fall
16 5.4 3.9 1.3 0.4 setosa Spring
17 5.1 3.5 1.4 0.3 setosa Fall
18 5.7 3.8 1.7 0.3 setosa Fall
19 5.1 3.8 1.5 0.3 setosa Spring
20 5.4 3.4 1.7 0.2 setosa Spring
21 5.1 3.7 1.5 0.4 setosa Spring
22 4.6 3.6 1.0 0.2 setosa Fall
23 5.1 3.3 1.7 0.5 setosa Spring
24 4.8 3.4 1.9 0.2 setosa Fall
25 5.0 3.0 1.6 0.2 setosa Fall
26 5.0 3.4 1.6 0.4 setosa Fall
27 5.2 3.5 1.5 0.2 setosa Fall
28 5.2 3.4 1.4 0.2 setosa Spring
29 4.7 3.2 1.6 0.2 setosa Spring
... ... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 virginica Fall
121 5.6 2.8 4.9 2.0 virginica Spring
122 7.7 2.8 6.7 2.0 virginica Fall
123 6.3 2.7 4.9 1.8 virginica Spring
124 6.7 3.3 5.7 2.1 virginica Fall
125 7.2 3.2 6.0 1.8 virginica Fall
126 6.2 2.8 4.8 1.8 virginica Fall
127 6.1 3.0 4.9 1.8 virginica Fall
128 6.4 2.8 5.6 2.1 virginica Fall
129 7.2 3.0 5.8 1.6 virginica Fall
130 7.4 2.8 6.1 1.9 virginica Fall
131 7.9 3.8 6.4 2.0 virginica Fall
132 6.4 2.8 5.6 2.2 virginica Spring
133 6.3 2.8 5.1 1.5 virginica Spring
134 6.1 2.6 5.6 1.4 virginica Fall
135 7.7 3.0 6.1 2.3 virginica Spring
136 6.3 3.4 5.6 2.4 virginica Spring
137 6.4 3.1 5.5 1.8 virginica Fall
138 6.0 3.0 4.8 1.8 virginica Fall
139 6.9 3.1 5.4 2.1 virginica Spring
140 6.7 3.1 5.6 2.4 virginica Fall
141 6.9 3.1 5.1 2.3 virginica Fall
142 5.8 2.7 5.1 1.9 virginica Fall
143 6.8 3.2 5.9 2.3 virginica Fall
144 6.7 3.3 5.7 2.5 virginica Spring
145 6.7 3.0 5.2 2.3 virginica Spring
146 6.3 2.5 5.0 1.9 virginica Spring
147 6.5 3.0 5.2 2.0 virginica Fall
148 6.2 3.4 5.4 2.3 virginica Fall
149 5.9 3.0 5.1 1.8 virginica Spring

150 rows × 6 columns


In [ ]: