In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [3]:
import os
from pathlib import Path
proj_dir = Path(os.getcwd()).parent
In [4]:
data_fname = proj_dir.joinpath('data', 'raw', 'iris.csv')
data_fname
Out[4]:
PosixPath('/home/xevaquor/code/overcome-the-chaos/data/raw/iris.csv')
In [5]:
dframe = pd.read_csv(data_fname, header=None)
dframe
Out[5]:
0
1
2
3
4
0
5.1
3.5
1.4
0.2
Iris-setosa
1
4.9
3.0
1.4
0.2
Iris-setosa
2
4.7
3.2
1.3
0.2
Iris-setosa
3
4.6
3.1
1.5
0.2
Iris-setosa
4
5.0
3.6
1.4
0.2
Iris-setosa
5
5.4
3.9
1.7
0.4
Iris-setosa
6
4.6
3.4
1.4
0.3
Iris-setosa
7
5.0
3.4
1.5
0.2
Iris-setosa
8
4.4
2.9
1.4
0.2
Iris-setosa
9
4.9
3.1
1.5
0.1
Iris-setosa
10
5.4
3.7
1.5
0.2
Iris-setosa
11
4.8
3.4
1.6
0.2
Iris-setosa
12
4.8
3.0
1.4
0.1
Iris-setosa
13
4.3
3.0
1.1
0.1
Iris-setosa
14
5.8
4.0
1.2
0.2
Iris-setosa
15
5.7
4.4
1.5
0.4
Iris-setosa
16
5.4
3.9
1.3
0.4
Iris-setosa
17
5.1
3.5
1.4
0.3
Iris-setosa
18
5.7
3.8
1.7
0.3
Iris-setosa
19
5.1
3.8
1.5
0.3
Iris-setosa
20
5.4
3.4
1.7
0.2
Iris-setosa
21
5.1
3.7
1.5
0.4
Iris-setosa
22
4.6
3.6
1.0
0.2
Iris-setosa
23
5.1
3.3
1.7
0.5
Iris-setosa
24
4.8
3.4
1.9
0.2
Iris-setosa
25
5.0
3.0
1.6
0.2
Iris-setosa
26
5.0
3.4
1.6
0.4
Iris-setosa
27
5.2
3.5
1.5
0.2
Iris-setosa
28
5.2
3.4
1.4
0.2
Iris-setosa
29
4.7
3.2
1.6
0.2
Iris-setosa
...
...
...
...
...
...
120
6.9
3.2
5.7
2.3
Iris-virginica
121
5.6
2.8
4.9
2.0
Iris-virginica
122
7.7
2.8
6.7
2.0
Iris-virginica
123
6.3
2.7
4.9
1.8
Iris-virginica
124
6.7
3.3
5.7
2.1
Iris-virginica
125
7.2
3.2
6.0
1.8
Iris-virginica
126
6.2
2.8
4.8
1.8
Iris-virginica
127
6.1
3.0
4.9
1.8
Iris-virginica
128
6.4
2.8
5.6
2.1
Iris-virginica
129
7.2
3.0
5.8
1.6
Iris-virginica
130
7.4
2.8
6.1
1.9
Iris-virginica
131
7.9
3.8
6.4
2.0
Iris-virginica
132
6.4
2.8
5.6
2.2
Iris-virginica
133
6.3
2.8
5.1
1.5
Iris-virginica
134
6.1
2.6
5.6
1.4
Iris-virginica
135
7.7
3.0
6.1
2.3
Iris-virginica
136
6.3
3.4
5.6
2.4
Iris-virginica
137
6.4
3.1
5.5
1.8
Iris-virginica
138
6.0
3.0
4.8
1.8
Iris-virginica
139
6.9
3.1
5.4
2.1
Iris-virginica
140
6.7
3.1
5.6
2.4
Iris-virginica
141
6.9
3.1
5.1
2.3
Iris-virginica
142
5.8
2.7
5.1
1.9
Iris-virginica
143
6.8
3.2
5.9
2.3
Iris-virginica
144
6.7
3.3
5.7
2.5
Iris-virginica
145
6.7
3.0
5.2
2.3
Iris-virginica
146
6.3
2.5
5.0
1.9
Iris-virginica
147
6.5
3.0
5.2
2.0
Iris-virginica
148
6.2
3.4
5.4
2.3
Iris-virginica
149
5.9
3.0
5.1
1.8
Iris-virginica
150 rows × 5 columns
In [6]:
dframe.columns = ['x0', 'x1', 'x2', 'x3', 'y']
dframe.head()
Out[6]:
x0
x1
x2
x3
y
0
5.1
3.5
1.4
0.2
Iris-setosa
1
4.9
3.0
1.4
0.2
Iris-setosa
2
4.7
3.2
1.3
0.2
Iris-setosa
3
4.6
3.1
1.5
0.2
Iris-setosa
4
5.0
3.6
1.4
0.2
Iris-setosa
In [7]:
dframe
Out[7]:
x0
x1
x2
x3
y
0
5.1
3.5
1.4
0.2
Iris-setosa
1
4.9
3.0
1.4
0.2
Iris-setosa
2
4.7
3.2
1.3
0.2
Iris-setosa
3
4.6
3.1
1.5
0.2
Iris-setosa
4
5.0
3.6
1.4
0.2
Iris-setosa
5
5.4
3.9
1.7
0.4
Iris-setosa
6
4.6
3.4
1.4
0.3
Iris-setosa
7
5.0
3.4
1.5
0.2
Iris-setosa
8
4.4
2.9
1.4
0.2
Iris-setosa
9
4.9
3.1
1.5
0.1
Iris-setosa
10
5.4
3.7
1.5
0.2
Iris-setosa
11
4.8
3.4
1.6
0.2
Iris-setosa
12
4.8
3.0
1.4
0.1
Iris-setosa
13
4.3
3.0
1.1
0.1
Iris-setosa
14
5.8
4.0
1.2
0.2
Iris-setosa
15
5.7
4.4
1.5
0.4
Iris-setosa
16
5.4
3.9
1.3
0.4
Iris-setosa
17
5.1
3.5
1.4
0.3
Iris-setosa
18
5.7
3.8
1.7
0.3
Iris-setosa
19
5.1
3.8
1.5
0.3
Iris-setosa
20
5.4
3.4
1.7
0.2
Iris-setosa
21
5.1
3.7
1.5
0.4
Iris-setosa
22
4.6
3.6
1.0
0.2
Iris-setosa
23
5.1
3.3
1.7
0.5
Iris-setosa
24
4.8
3.4
1.9
0.2
Iris-setosa
25
5.0
3.0
1.6
0.2
Iris-setosa
26
5.0
3.4
1.6
0.4
Iris-setosa
27
5.2
3.5
1.5
0.2
Iris-setosa
28
5.2
3.4
1.4
0.2
Iris-setosa
29
4.7
3.2
1.6
0.2
Iris-setosa
...
...
...
...
...
...
120
6.9
3.2
5.7
2.3
Iris-virginica
121
5.6
2.8
4.9
2.0
Iris-virginica
122
7.7
2.8
6.7
2.0
Iris-virginica
123
6.3
2.7
4.9
1.8
Iris-virginica
124
6.7
3.3
5.7
2.1
Iris-virginica
125
7.2
3.2
6.0
1.8
Iris-virginica
126
6.2
2.8
4.8
1.8
Iris-virginica
127
6.1
3.0
4.9
1.8
Iris-virginica
128
6.4
2.8
5.6
2.1
Iris-virginica
129
7.2
3.0
5.8
1.6
Iris-virginica
130
7.4
2.8
6.1
1.9
Iris-virginica
131
7.9
3.8
6.4
2.0
Iris-virginica
132
6.4
2.8
5.6
2.2
Iris-virginica
133
6.3
2.8
5.1
1.5
Iris-virginica
134
6.1
2.6
5.6
1.4
Iris-virginica
135
7.7
3.0
6.1
2.3
Iris-virginica
136
6.3
3.4
5.6
2.4
Iris-virginica
137
6.4
3.1
5.5
1.8
Iris-virginica
138
6.0
3.0
4.8
1.8
Iris-virginica
139
6.9
3.1
5.4
2.1
Iris-virginica
140
6.7
3.1
5.6
2.4
Iris-virginica
141
6.9
3.1
5.1
2.3
Iris-virginica
142
5.8
2.7
5.1
1.9
Iris-virginica
143
6.8
3.2
5.9
2.3
Iris-virginica
144
6.7
3.3
5.7
2.5
Iris-virginica
145
6.7
3.0
5.2
2.3
Iris-virginica
146
6.3
2.5
5.0
1.9
Iris-virginica
147
6.5
3.0
5.2
2.0
Iris-virginica
148
6.2
3.4
5.4
2.3
Iris-virginica
149
5.9
3.0
5.1
1.8
Iris-virginica
150 rows × 5 columns
In [8]:
p = sns.pairplot(dframe, vars=['x0', 'x1', 'x2', 'x3'], hue='y')
In [9]:
def generate_new(df):
return pd.Series([
df['x0'] * df['x1'],
df['x1'] * df['x2'],
df['x2'] * df['x3'],
df['x3'] * df['x0'],
])
new = dframe.apply(generate_new, axis=1)
new.columns = ['x4', 'x5', 'x6', 'x7']
dframe = dframe.join(new)
In [10]:
sns.pairplot(dframe, vars=[ 'x4', 'x5', 'x6', 'x7'], hue='y')
Out[10]:
<seaborn.axisgrid.PairGrid at 0x7f4cb61444e0>
In [ ]:
Content source: artofai/overcome-the-chaos
Similar notebooks: