In [1]:
# ==  Basic import == #
# plot within the notebook
%matplotlib inline
# No annoying warnings
import warnings
warnings.filterwarnings('ignore')

Open a file, read the data and create your numpy array


In [2]:
import numpy as np

In [3]:
datafile = open("data/iris.csv").read().splitlines()

In [4]:
entires = np.asarray(datafile[0].split(","))

In [5]:
entires


Out[5]:
array(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'], 
      dtype='|S12')

In [6]:
sepal_length, sepal_width, petal_length, petal_width,species = [],[],[],[],[] # DON'T do [[]]*5

In [7]:
sepal_length


Out[7]:
[]

In [8]:
for dd in datafile[1:]:
    sepal_length_, sepal_width_, petal_length_, petal_width_,species_ = dd.split(",")
    sepal_length.append(sepal_length_)
    sepal_width.append(sepal_width_)
    petal_length.append(petal_length_)
    petal_width.append(petal_width_)
    species.append(species_)
    
    
sepal_length = np.asarray(sepal_length)
sepal_width = np.asarray(sepal_width)
petal_length = np.asarray(petal_length)
petal_width = np.asarray(petal_width)
species = np.asarray(species)

In [9]:
species_unique = np.unique(species)

Or use the comprehesion list


In [13]:
sepal_length, sepal_width, petal_length, petal_width,species = np.asarray([dd.split(",") for dd in datafile[1:]]).T

Let analyze things by species


In [31]:
sepal_length[species=="setosa"]


Out[31]:
array(['5.1', '4.9', '4.7', '4.6', '5.0', '5.4', '4.6', '5.0', '4.4',
       '4.9', '5.4', '4.8', '4.8', '4.3', '5.8', '5.7', '5.4', '5.1',
       '5.7', '5.1', '5.4', '5.1', '4.6', '5.1', '4.8', '5.0', '5.0',
       '5.2', '5.2', '4.7', '4.8', '5.4', '5.2', '5.5', '4.9', '5.0',
       '5.5', '4.9', '4.4', '5.1', '5.0', '4.5', '4.4', '5.0', '5.1',
       '4.8', '5.1', '4.6', '5.3', '5.0', '5.1', '4.9', '4.7', '4.6',
       '5.0', '5.4', '4.6', '5.0', '4.4', '4.9', '5.4', '4.8', '4.8',
       '4.3', '5.8', '5.7', '5.4', '5.1', '5.7', '5.1', '5.4', '5.1',
       '4.6', '5.1', '4.8', '5.0', '5.0', '5.2', '5.2', '4.7', '4.8',
       '5.4', '5.2', '5.5', '4.9', '5.0', '5.5', '4.9', '4.4', '5.1',
       '5.0', '4.5', '4.4', '5.0', '5.1', '4.8', '5.1', '4.6', '5.3', '5.0'], 
      dtype='|S3')

In [33]:
sepal_length[species=="versicolor"]


Out[33]:
array(['7.0', '6.4', '6.9', '5.5', '6.5', '5.7', '6.3', '4.9', '6.6',
       '5.2', '5.0', '5.9', '6.0', '6.1', '5.6', '6.7', '5.6', '5.8',
       '6.2', '5.6', '5.9', '6.1', '6.3', '6.1', '6.4', '6.6', '6.8',
       '6.7', '6.0', '5.7', '5.5', '5.5', '5.8', '6.0', '5.4', '6.0',
       '6.7', '6.3', '5.6', '5.5', '5.5', '6.1', '5.8', '5.0', '5.6',
       '5.7', '5.7', '6.2', '5.1', '5.7', '7.0', '6.4', '6.9', '5.5',
       '6.5', '5.7', '6.3', '4.9', '6.6', '5.2', '5.0', '5.9', '6.0',
       '6.1', '5.6', '6.7', '5.6', '5.8', '6.2', '5.6', '5.9', '6.1',
       '6.3', '6.1', '6.4', '6.6', '6.8', '6.7', '6.0', '5.7', '5.5',
       '5.5', '5.8', '6.0', '5.4', '6.0', '6.7', '6.3', '5.6', '5.5',
       '5.5', '6.1', '5.8', '5.0', '5.6', '5.7', '5.7', '6.2', '5.1', '5.7'], 
      dtype='|S3')

In [34]:
import matplotlib.pyplot as mpl

In [45]:
mpl.scatter(sepal_length[species=="versicolor"], petal_length[species=="versicolor"], 
            s=150, facecolors=mpl.cm.Blues(0.6,0.4), label="versicolor")

mpl.scatter(sepal_length[species=="setosa"], petal_length[species=="setosa"], 
            s=150, facecolors=mpl.cm.Reds(0.6,0.4), label="setosa")

mpl.scatter(sepal_length[species=="virginica"], petal_length[species=="virginica"], 
            s=150, facecolors=mpl.cm.Greens(0.6,0.4), label="virginica")

mpl.xlabel("Sepal Length", fontsize = "xx-large")
mpl.ylabel("Petal Length", fontsize = "xx-large")
mpl.legend(loc="upper left", frameon=False)


Out[45]:
<matplotlib.legend.Legend at 0x10673f990>