In [1]:
import pandas as pd
pd.set_option('precision', 2) # 2 decimal places
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 30)
pd.set_option('display.width', 100) # wide windows



import sklearn
from sklearn.datasets import load_iris
iris = load_iris()

# Extract numpy arrays
X = iris.data 
y = iris.target


# Convert to pandas dataframe 
df = pd.DataFrame(data=X, columns=['sl', 'sw', 'pl', 'pw'])
# create column for labels
df['label'] = pd.Series(iris.target_names[y], dtype='category')

In [2]:
df


Out[2]:
sl sw pl pw label
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
5 5.4 3.9 1.7 0.4 setosa
6 4.6 3.4 1.4 0.3 setosa
7 5.0 3.4 1.5 0.2 setosa
8 4.4 2.9 1.4 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
... ... ... ... ... ...
140 6.7 3.1 5.6 2.4 virginica
141 6.9 3.1 5.1 2.3 virginica
142 5.8 2.7 5.1 1.9 virginica
143 6.8 3.2 5.9 2.3 virginica
144 6.7 3.3 5.7 2.5 virginica
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns


In [3]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
# We made a cached copy since UCI repository is often down
#url = 'https://raw.githubusercontent.com/probml/pyprobml/master/data/mpg.csv'
#column_names = ['mpg','cylinders','displacement','horsepower','weight',
#                'acceleration', 'model_year', 'origin', 'name'] 
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Year', 'Origin', 'Name']
df = pd.read_csv(url, names=column_names, sep='\s+', na_values="?")

# The last column (name) is a unique id for the car, so we drop it
df = df.drop(columns=['Name'])

In [4]:
df


Out[4]:
MPG Cylinders Displacement Horsepower Weight Acceleration Year Origin
0 18.0 8 307.0 130.0 3504.0 12.0 70 1
1 15.0 8 350.0 165.0 3693.0 11.5 70 1
2 18.0 8 318.0 150.0 3436.0 11.0 70 1
3 16.0 8 304.0 150.0 3433.0 12.0 70 1
4 17.0 8 302.0 140.0 3449.0 10.5 70 1
5 15.0 8 429.0 198.0 4341.0 10.0 70 1
6 14.0 8 454.0 220.0 4354.0 9.0 70 1
7 14.0 8 440.0 215.0 4312.0 8.5 70 1
8 14.0 8 455.0 225.0 4425.0 10.0 70 1
9 15.0 8 390.0 190.0 3850.0 8.5 70 1
... ... ... ... ... ... ... ... ...
388 26.0 4 156.0 92.0 2585.0 14.5 82 1
389 22.0 6 232.0 112.0 2835.0 14.7 82 1
390 32.0 4 144.0 96.0 2665.0 13.9 82 3
391 36.0 4 135.0 84.0 2370.0 13.0 82 1
392 27.0 4 151.0 90.0 2950.0 17.3 82 1
393 27.0 4 140.0 86.0 2790.0 15.6 82 1
394 44.0 4 97.0 52.0 2130.0 24.6 82 2
395 32.0 4 135.0 84.0 2295.0 11.6 82 1
396 28.0 4 120.0 79.0 2625.0 18.6 82 1
397 31.0 4 119.0 82.0 2720.0 19.4 82 1

398 rows × 8 columns


In [ ]: