Taking examples/examples.ipynb as a starting point.



In [1]:

    
%matplotlib inline
%load_ext autoreload
%autoreload 2



In [2]:

    
import os
import sys

sys.path.append("..")
sys.path.append("../..")

import numpy as np 
import pandas as pd
import yellowbrick as yb



In [3]:

    
from yellowbrick.features import (ParallelCoordinates,
                                  parallel_coordinates)

Dataset



In [4]:

    
from download import download_all 

## The path to the test data sets
FIXTURES  = os.path.join(os.getcwd(), "data")

## Dataset loading mechanisms
datasets = {
    "credit": os.path.join(FIXTURES, "credit", "credit.csv"),
    "concrete": os.path.join(FIXTURES, "concrete", "concrete.csv"),
    "occupancy": os.path.join(FIXTURES, "occupancy", "occupancy.csv"),
    "mushroom": os.path.join(FIXTURES, "mushroom", "mushroom.csv"),
}

def load_data(name, download=True):
    """
    Loads and wrangles the passed in dataset by name.
    If download is specified, this method will download any missing files. 
    """
    # Get the path from the datasets 
    path = datasets[name]
    
    # Check if the data exists, otherwise download or raise 
    if not os.path.exists(path):
        if download:
            download_all() 
        else:
            raise ValueError((
                "'{}' dataset has not been downloaded, "
                "use the download.py module to fetch datasets"
            ).format(name))
    
    # Return the data frame
    return pd.read_csv(path)



In [5]:

    
# Load the classification data set
data = load_data('occupancy') 
print(len(data))
data.head()









    



20560






    Out[5]:







  
    
      
      datetime
      temperature
      relative humidity
      light
      C02
      humidity
      occupancy
    
  
  
    
      0
      2015-02-04 17:51:00
      23.18
      27.2720
      426.0
      721.25
      0.004793
      1
    
    
      1
      2015-02-04 17:51:59
      23.15
      27.2675
      429.5
      714.00
      0.004783
      1
    
    
      2
      2015-02-04 17:53:00
      23.15
      27.2450
      426.0
      713.50
      0.004779
      1
    
    
      3
      2015-02-04 17:54:00
      23.15
      27.2000
      426.0
      708.25
      0.004772
      1
    
    
      4
      2015-02-04 17:55:00
      23.10
      27.2000
      426.0
      704.50
      0.004757
      1



In [6]:

    
# Specify the features of interest and the classes of the target 
features = ["temperature", "relative humidity", "light", "C02", "humidity"]
classes = ['unoccupied', 'occupied']

# Extract the numpy arrays from the data frame 
X = data.head(1000)[features]
y = data.head(1000).occupancy

Parallel Coordinates

add dataframe compatibility



In [7]:

    
# numpy inputs
visualizer = ParallelCoordinates(features=features, classes=classes)
visualizer.fit_transform_poof(X.values, y.values);



In [8]:

    
# numpy inputs, no labels
visualizer = ParallelCoordinates(classes=classes)
visualizer.fit_transform_poof(X.values, y.values);



In [9]:

    
# dataframe inputs
visualizer = ParallelCoordinates(classes=classes)
visualizer.fit_transform_poof(X, y);



In [10]:

    
# quick method
parallel_coordinates(X, y);

`normalize` argument



In [11]:

    
visualizer = ParallelCoordinates(normalize='minmax', classes=classes)
visualizer.fit_transform_poof(X, y);



In [12]:

    
visualizer = ParallelCoordinates(normalize='maxabs', classes=classes)
visualizer.fit_transform_poof(X, y);



In [13]:

    
visualizer = ParallelCoordinates(normalize='standard', classes=classes)
visualizer.fit_transform_poof(X, y);



In [14]:

    
visualizer = ParallelCoordinates(normalize='l1', classes=classes)
visualizer.fit_transform_poof(X, y);



In [15]:

    
visualizer = ParallelCoordinates(normalize='l2', classes=classes)
visualizer.fit_transform_poof(X, y);



In [16]:

    
visualizer = ParallelCoordinates(normalize='l2', classes=classes)
visualizer.fit_transform_poof(X, y);



In [17]:

    
# should raise YellowbrickValueError
visualizer = ParallelCoordinates(normalize='bad', classes=classes)
visualizer.fit_transform_poof(X, y);









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-fcf498de708c> in <module>()
      1 # should raise YellowbrickValueError
----> 2 visualizer = ParallelCoordinates(normalize='bad', classes=classes)
      3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in __init__(self, ax, features, classes, normalize, sample, color, colormap, vlines, vlines_kwds, **kwargs)
    205             raise YellowbrickValueError(
    206                 "'{}' is an unrecognized normalization method"
--> 207                 .format(self.normalize)
    208             )
    209         self.sample = sample

AttributeError: 'ParallelCoordinates' object has no attribute 'normalize'



In [18]:

    
# quick method
parallel_coordinates(X, y, normalize='standard');

`sample` argument



In [19]:

    
visualizer = ParallelCoordinates(classes=classes, sample=200)
visualizer.fit_transform_poof(X, y);



In [20]:

    
visualizer = ParallelCoordinates(classes=classes, sample=0.2)
visualizer.fit_transform_poof(X, y);



In [21]:

    
# quick method
parallel_coordinates(X, y, sample=0.2);



In [22]:

    
# should raise YellowbrickTypeError
visualizer = ParallelCoordinates(classes=classes, sample='bad')
visualizer.fit_transform_poof(X, y);









    



---------------------------------------------------------------------------
YellowbrickTypeError                      Traceback (most recent call last)
<ipython-input-22-fde984a70554> in <module>()
      1 # should raise YellowbrickTypeError
      2 visualizer = ParallelCoordinates(classes=classes, sample='bad')
----> 3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit_transform_poof(self, X, y, **kwargs)
     70         return the result of the transform method.
     71         """
---> 72         Xp = self.fit_transform(X, y, **kwargs)
     73         self.poof(**kwargs)
     74         return Xp

~/.virtualenvs/yellowbrick/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    495         else:
    496             # fit method of arity 2 (supervised transformation)
--> 497             return self.fit(X, y, **fit_params).transform(X)
    498 
    499 

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    191 
    192         # Draw the instances
--> 193         self.draw(X, y, **kwargs)
    194 
    195         # Fit always returns self.

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    240         else:
    241             raise YellowbrickTypeError(
--> 242                 "`sample` parameter must be int or float"
    243             )
    244         X = X[:self.n_samples, :]

YellowbrickTypeError: `sample` parameter must be int or float



In [23]:

    
# should raise YellowbrickValueError
visualizer = ParallelCoordinates(classes=classes, sample=-1)
visualizer.fit_transform_poof(X, y);









    



---------------------------------------------------------------------------
YellowbrickValueError                     Traceback (most recent call last)
<ipython-input-23-a8a0ae171ee4> in <module>()
      1 # should raise YellowbrickValueError
      2 visualizer = ParallelCoordinates(classes=classes, sample=-1)
----> 3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit_transform_poof(self, X, y, **kwargs)
     70         return the result of the transform method.
     71         """
---> 72         Xp = self.fit_transform(X, y, **kwargs)
     73         self.poof(**kwargs)
     74         return Xp

~/.virtualenvs/yellowbrick/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    495         else:
    496             # fit method of arity 2 (supervised transformation)
--> 497             return self.fit(X, y, **fit_params).transform(X)
    498 
    499 

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    191 
    192         # Draw the instances
--> 193         self.draw(X, y, **kwargs)
    194 
    195         # Fit always returns self.

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    229             if self.sample < 1:
    230                 raise YellowbrickValueError(
--> 231                     "`sample` parameter of type `int` must be greater than 1"
    232                 )
    233             self.n_samples = min([self.sample, len(X)])

YellowbrickValueError: `sample` parameter of type `int` must be greater than 1



In [24]:

    
# should raise YellowbrickValueError
visualizer = ParallelCoordinates(classes=classes, sample=1.1)
visualizer.fit_transform_poof(X, y);









    



---------------------------------------------------------------------------
YellowbrickValueError                     Traceback (most recent call last)
<ipython-input-24-24bc492b8f98> in <module>()
      1 # should raise YellowbrickValueError
      2 visualizer = ParallelCoordinates(classes=classes, sample=1.1)
----> 3 visualizer.fit_transform_poof(X, y);

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit_transform_poof(self, X, y, **kwargs)
     70         return the result of the transform method.
     71         """
---> 72         Xp = self.fit_transform(X, y, **kwargs)
     73         self.poof(**kwargs)
     74         return Xp

~/.virtualenvs/yellowbrick/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    495         else:
    496             # fit method of arity 2 (supervised transformation)
--> 497             return self.fit(X, y, **fit_params).transform(X)
    498 
    499 

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    191 
    192         # Draw the instances
--> 193         self.draw(X, y, **kwargs)
    194 
    195         # Fit always returns self.

~/Google Drive/projects/other/yellowbrick/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    235             if self.sample <= 0 or self.sample > 1:
    236                 raise YellowbrickValueError(
--> 237                     "`sample` parameter of type `float` must be between 0 and 1"
    238                 )
    239             self.n_samples = int(len(X) * self.sample)

YellowbrickValueError: `sample` parameter of type `float` must be between 0 and 1



In [ ]:

	datetime	temperature	relative humidity	light	C02	humidity	occupancy
0	2015-02-04 17:51:00	23.18	27.2720	426.0	721.25	0.004793	1
1	2015-02-04 17:51:59	23.15	27.2675	429.5	714.00	0.004783	1
2	2015-02-04 17:53:00	23.15	27.2450	426.0	713.50	0.004779	1
3	2015-02-04 17:54:00	23.15	27.2000	426.0	708.25	0.004772	1
4	2015-02-04 17:55:00	23.10	27.2000	426.0	704.50	0.004757	1

Dataset

Parallel Coordinates

normalize argument

sample argument

`normalize` argument

`sample` argument