In [1]:
import pandas as pd
pd.set_option('precision', 2) # 2 decimal places
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 30)
pd.set_option('display.width', 100) # wide windows
import sklearn
from sklearn.datasets import load_iris
iris = load_iris()
# Extract numpy arrays
X = iris.data
y = iris.target
# Convert to pandas dataframe
df = pd.DataFrame(data=X, columns=['sl', 'sw', 'pl', 'pw'])
# create column for labels
df['label'] = pd.Series(iris.target_names[y], dtype='category')
In [2]:
df
Out[2]:
In [3]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
# We made a cached copy since UCI repository is often down
#url = 'https://raw.githubusercontent.com/probml/pyprobml/master/data/mpg.csv'
#column_names = ['mpg','cylinders','displacement','horsepower','weight',
# 'acceleration', 'model_year', 'origin', 'name']
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
'Acceleration', 'Year', 'Origin', 'Name']
df = pd.read_csv(url, names=column_names, sep='\s+', na_values="?")
# The last column (name) is a unique id for the car, so we drop it
df = df.drop(columns=['Name'])
In [4]:
df
Out[4]:
In [ ]: