Taking examples/examples.ipynb as a starting point.


In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.append("..")
sys.path.append("../..")

import numpy as np 
import pandas as pd
import yellowbrick as yb

In [3]:
from yellowbrick.features import (ParallelCoordinates,
                                  parallel_coordinates)

Dataset


In [4]:
from download import download_all 

## The path to the test data sets
FIXTURES  = os.path.join(os.getcwd(), "data")

## Dataset loading mechanisms
datasets = {
    "credit": os.path.join(FIXTURES, "credit", "credit.csv"),
    "concrete": os.path.join(FIXTURES, "concrete", "concrete.csv"),
    "occupancy": os.path.join(FIXTURES, "occupancy", "occupancy.csv"),
    "mushroom": os.path.join(FIXTURES, "mushroom", "mushroom.csv"),
}

def load_data(name, download=True):
    """
    Loads and wrangles the passed in dataset by name.
    If download is specified, this method will download any missing files. 
    """
    # Get the path from the datasets 
    path = datasets[name]
    
    # Check if the data exists, otherwise download or raise 
    if not os.path.exists(path):
        if download:
            download_all() 
        else:
            raise ValueError((
                "'{}' dataset has not been downloaded, "
                "use the download.py module to fetch datasets"
            ).format(name))
    
    # Return the data frame
    return pd.read_csv(path)

In [5]:
# Load the classification data set
data = load_data('occupancy') 
print(len(data))
data.head()


20560
Out[5]:
datetime temperature relative humidity light C02 humidity occupancy
0 2015-02-04 17:51:00 23.18 27.2720 426.0 721.25 0.004793 1
1 2015-02-04 17:51:59 23.15 27.2675 429.5 714.00 0.004783 1
2 2015-02-04 17:53:00 23.15 27.2450 426.0 713.50 0.004779 1
3 2015-02-04 17:54:00 23.15 27.2000 426.0 708.25 0.004772 1
4 2015-02-04 17:55:00 23.10 27.2000 426.0 704.50 0.004757 1

In [6]:
# Specify the features of interest and the classes of the target 
features = ["temperature", "relative humidity", "light", "C02", "humidity"]
classes = ['unoccupied', 'occupied']

# Extract the numpy arrays from the data frame 
X = data.head(1000)[features]
y = data.head(1000).occupancy

Parallel Coordinates

  • add dataframe compatibility

In [7]:
# numpy inputs
visualizer = ParallelCoordinates(features=features, classes=classes)
visualizer.fit_transform_poof(X.values, y.values);



In [8]:
# numpy inputs, no labels
visualizer = ParallelCoordinates(classes=classes)
visualizer.fit_transform_poof(X.values, y.values);



In [9]:
# dataframe inputs
visualizer = ParallelCoordinates(classes=classes)
visualizer.fit_transform_poof(X, y);



In [10]:
# quick method
parallel_coordinates(X, y);


normalize argument


In [11]:
visualizer = ParallelCoordinates(normalize='minmax', classes=classes)
visualizer.fit_transform_poof(X, y);



In [12]:
visualizer = ParallelCoordinates(normalize='maxabs', classes=classes)
visualizer.fit_transform_poof(X, y);



In [13]:
visualizer = ParallelCoordinates(normalize='standard', classes=classes)
visualizer.fit_transform_poof(X, y);



In [14]:
visualizer = ParallelCoordinates(normalize='l1', classes=classes)
visualizer.fit_transform_poof(X, y);



In [15]:
visualizer = ParallelCoordinates(normalize='l2', classes=classes)
visualizer.fit_transform_poof(X, y);



In [16]:
visualizer = ParallelCoordinates(normalize='l2', classes=classes)
visualizer.fit_transform_poof(X, y);



In [17]:
# should raise YellowbrickValueError
visualizer = ParallelCoordinates(normalize='bad', classes=classes)
visualizer.fit_transform_poof(X, y);


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-fcf498de708c> in <module>()
      1 # should raise YellowbrickValueError
----> 2 visualizer = ParallelCoordinates(normalize='bad', classes=classes)
      3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in __init__(self, ax, features, classes, normalize, sample, color, colormap, vlines, vlines_kwds, **kwargs)
    205             raise YellowbrickValueError(
    206                 "'{}' is an unrecognized normalization method"
--> 207                 .format(self.normalize)
    208             )
    209         self.sample = sample

AttributeError: 'ParallelCoordinates' object has no attribute 'normalize'

In [18]:
# quick method
parallel_coordinates(X, y, normalize='standard');


sample argument


In [19]:
visualizer = ParallelCoordinates(classes=classes, sample=200)
visualizer.fit_transform_poof(X, y);



In [20]:
visualizer = ParallelCoordinates(classes=classes, sample=0.2)
visualizer.fit_transform_poof(X, y);



In [21]:
# quick method
parallel_coordinates(X, y, sample=0.2);



In [22]:
# should raise YellowbrickTypeError
visualizer = ParallelCoordinates(classes=classes, sample='bad')
visualizer.fit_transform_poof(X, y);


---------------------------------------------------------------------------
YellowbrickTypeError                      Traceback (most recent call last)
<ipython-input-22-fde984a70554> in <module>()
      1 # should raise YellowbrickTypeError
      2 visualizer = ParallelCoordinates(classes=classes, sample='bad')
----> 3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit_transform_poof(self, X, y, **kwargs)
     70         return the result of the transform method.
     71         """
---> 72         Xp = self.fit_transform(X, y, **kwargs)
     73         self.poof(**kwargs)
     74         return Xp

~/.virtualenvs/yellowbrick/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    495         else:
    496             # fit method of arity 2 (supervised transformation)
--> 497             return self.fit(X, y, **fit_params).transform(X)
    498 
    499 

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    191 
    192         # Draw the instances
--> 193         self.draw(X, y, **kwargs)
    194 
    195         # Fit always returns self.

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    240         else:
    241             raise YellowbrickTypeError(
--> 242                 "`sample` parameter must be int or float"
    243             )
    244         X = X[:self.n_samples, :]

YellowbrickTypeError: `sample` parameter must be int or float

In [23]:
# should raise YellowbrickValueError
visualizer = ParallelCoordinates(classes=classes, sample=-1)
visualizer.fit_transform_poof(X, y);


---------------------------------------------------------------------------
YellowbrickValueError                     Traceback (most recent call last)
<ipython-input-23-a8a0ae171ee4> in <module>()
      1 # should raise YellowbrickValueError
      2 visualizer = ParallelCoordinates(classes=classes, sample=-1)
----> 3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit_transform_poof(self, X, y, **kwargs)
     70         return the result of the transform method.
     71         """
---> 72         Xp = self.fit_transform(X, y, **kwargs)
     73         self.poof(**kwargs)
     74         return Xp

~/.virtualenvs/yellowbrick/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    495         else:
    496             # fit method of arity 2 (supervised transformation)
--> 497             return self.fit(X, y, **fit_params).transform(X)
    498 
    499 

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    191 
    192         # Draw the instances
--> 193         self.draw(X, y, **kwargs)
    194 
    195         # Fit always returns self.

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    229             if self.sample < 1:
    230                 raise YellowbrickValueError(
--> 231                     "`sample` parameter of type `int` must be greater than 1"
    232                 )
    233             self.n_samples = min([self.sample, len(X)])

YellowbrickValueError: `sample` parameter of type `int` must be greater than 1

In [24]:
# should raise YellowbrickValueError
visualizer = ParallelCoordinates(classes=classes, sample=1.1)
visualizer.fit_transform_poof(X, y);


---------------------------------------------------------------------------
YellowbrickValueError                     Traceback (most recent call last)
<ipython-input-24-24bc492b8f98> in <module>()
      1 # should raise YellowbrickValueError
      2 visualizer = ParallelCoordinates(classes=classes, sample=1.1)
----> 3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit_transform_poof(self, X, y, **kwargs)
     70         return the result of the transform method.
     71         """
---> 72         Xp = self.fit_transform(X, y, **kwargs)
     73         self.poof(**kwargs)
     74         return Xp

~/.virtualenvs/yellowbrick/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    495         else:
    496             # fit method of arity 2 (supervised transformation)
--> 497             return self.fit(X, y, **fit_params).transform(X)
    498 
    499 

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    191 
    192         # Draw the instances
--> 193         self.draw(X, y, **kwargs)
    194 
    195         # Fit always returns self.

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    235             if self.sample <= 0 or self.sample > 1:
    236                 raise YellowbrickValueError(
--> 237                     "`sample` parameter of type `float` must be between 0 and 1"
    238                 )
    239             self.n_samples = int(len(X) * self.sample)

YellowbrickValueError: `sample` parameter of type `float` must be between 0 and 1

In [ ]: