In [1]:
from IPython.display import HTML
HTML('<iframe src=http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data width=300 height=200></iframe>')
Out[1]:
Iris is a famous dataset for machine learning because prediction is easy. It is also built into scikit-learn
In [3]:
# import load_iris function from datasets module
from sklearn.datasets import load_iris
#save "bunch" object containing iris dataset and its attributes
iris = load_iris()
type(iris)
Out[3]:
In [4]:
# print the iris data
print iris.data
Machine learning terminology:
- Each row is an observation
- each column is a feature.
In [6]:
# print the names of the four features
print iris.feature_names
In [7]:
print iris.target
In [8]:
print iris.target_names
each value we are predicting is the response
In [9]:
print type(iris.data)
print type(iris.target)
In [10]:
# check the shape of the features (first dimension = number of observations, second dimensions = number of features)
print iris.data.shape
In [11]:
# check the shape of the response (single dimension matching the number of observations)
print iris.target.shape
In [12]:
# store feature matrix in "X"
X = iris.data
# store response vector in "y"
y = iris.target