In [1]:
from yellowbrick.datasets import load_occupancy
from yellowbrick.features import ParallelCoordinates

# Load the classification data set
X, y = load_occupancy(return_dataset=True).to_pandas()

In [2]:
# Specify the features of interest and the classes of the target
features = [
    "temperature", "relative humidity", "light", "CO2", "humidity"
]
classes = ["unoccupied", "occupied"]

In [3]:
X = X[features]

In [4]:
# Instantiate the visualizer
visualizer = ParallelCoordinates(
    classes=classes, features=features, sample=0.05, shuffle=True,
    color=["#4C72B0", "#C44E52"], size=(700,200)
)

# Fit and transform the data to the visualizer
visualizer.fit_transform(X, y)

# Finalize the title and axes then display the visualization
visualizer.poof()



In [5]:
# Instantiate the visualizer
visualizer = ParallelCoordinates(
    classes=classes, features=features, normalize='standard',
    sample=0.05, shuffle=True, color=["#4C72B0", "#C44E52"],
    size=(700,200)
)

# Fit and transform the data to the visualizer
visualizer.fit_transform(X, y)

# Finalize the title and axes then display the visualization
visualizer.poof()



In [6]:
# Instantiate the visualizer
visualizer = ParallelCoordinates(
    classes=classes, features=features, sample=0.05, shuffle=True,
    normalize='standard', fast=True, color=["#4C72B0", "#C44E52"],
    size=(700,200)
)

# Fit and transform the data to the visualizer
visualizer.fit_transform(X, y)

# Finalize the title and axes then display the visualization
visualizer.poof()



In [7]:
import time
import matplotlib.pyplot as plt
from yellowbrick.features import ParallelCoordinates
import pandas as pd
import numpy as np


def plot_speedup(trials=5, factors=np.arange(1, 11)):

    def pcoords_time(X, y, fast=True):
        _, ax = plt.subplots()
        oz = ParallelCoordinates(fast=fast, ax=ax)

        start = time.time()
        oz.fit_transform(X, y)
        delta = time.time() - start

        plt.cla()        # clear current axis
        plt.clf()        # clear current figure
        plt.close("all") # close all existing plots

        return delta

    def pcoords_speedup(X, y):
        fast_time = pcoords_time(X, y, fast=True)
        slow_time = pcoords_time(X, y, fast=False)

        return slow_time / fast_time

    X,y = load_occupancy(return_dataset=True).to_numpy()

    speedups = []
    variance = []

    for factor in factors:
        X = np.repeat(X, factor, axis=0)
        y = np.repeat(y, factor, axis=0)

        local_speedups = []
        for trial in range(trials):
            local_speedups.append(pcoords_speedup(X, y))

        local_speedups = np.array(local_speedups)
        speedups.append(local_speedups.mean())
        variance.append(local_speedups.std())

    speedups = np.array(speedups)
    variance = np.array(variance)

    series = pd.Series(speedups, index=factors)
    _, ax = plt.subplots(figsize=(9,6))
    series.plot(ax=ax, marker='o', label="speedup factor", color="#C44E52")

    # Plot one standard deviation above and below the mean
    ax.fill_between(
        factors, speedups - variance, speedups + variance, alpha=0.25,
        color="#C44E52",
    )

    ax.set_ylabel("Speedup")
    ax.set_xlabel("Data size (# repeats)")
    ax.set_title("Speed Improvement of Fast Parallel Coordinates")
    plt.show()

In [ ]:
plot_speedup()


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-8-2a5bc3a0e6fc> in <module>()
----> 1 plot_speedup()

<ipython-input-7-9e78ac9d2f7a> in plot_speedup(trials, factors)
     39         local_speedups = []
     40         for trial in range(trials):
---> 41             local_speedups.append(pcoords_speedup(X, y))
     42 
     43         local_speedups = np.array(local_speedups)

<ipython-input-7-9e78ac9d2f7a> in pcoords_speedup(X, y)
     24     def pcoords_speedup(X, y):
     25         fast_time = pcoords_time(X, y, fast=True)
---> 26         slow_time = pcoords_time(X, y, fast=False)
     27 
     28         return slow_time / fast_time

<ipython-input-7-9e78ac9d2f7a> in pcoords_time(X, y, fast)
     13 
     14         start = time.time()
---> 15         oz.fit_transform(X, y)
     16         delta = time.time() - start
     17 

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    554         else:
    555             # fit method of arity 2 (supervised transformation)
--> 556             return self.fit(X, y, **fit_params).transform(X)
    557 
    558 

~/Desktop/eudicot/acorn/my_yb/yellowbrick/features/pcoords.py in fit(self, X, y, **kwargs)
    373 
    374         # the super method calls draw and returns self
--> 375         return super(ParallelCoordinates, self).fit(X, y, **kwargs)
    376 
    377     def draw(self, X, y, **kwargs):

~/Desktop/eudicot/acorn/my_yb/yellowbrick/features/base.py in fit(self, X, y, **kwargs)
    278 
    279         # Draw the instances
--> 280         self.draw(X, y, **kwargs)
    281 
    282         # Fit always returns self.

~/Desktop/eudicot/acorn/my_yb/yellowbrick/features/pcoords.py in draw(self, X, y, **kwargs)
    394         if self.fast:
    395             return self.draw_classes(X, y, **kwargs)
--> 396         return self.draw_instances(X, y, **kwargs)
    397 
    398     def draw_instances(self, X, y, **kwargs):

~/Desktop/eudicot/acorn/my_yb/yellowbrick/features/pcoords.py in draw_instances(self, X, y, **kwargs)
    435             self.ax.plot(
    436                 self._increments, Xi,
--> 437                 color=self._colors[label], alpha=alpha, **kwargs
    438             )
    439 

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_axes.py in plot(self, scalex, scaley, data, *args, **kwargs)
   1667         for line in lines:
   1668             self.add_line(line)
-> 1669         self.autoscale_view(scalex=scalex, scaley=scaley)
   1670         return lines
   1671 

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_base.py in autoscale_view(self, tight, scalex, scaley)
   2411                 (self._xmargin and scalex and self._autoscaleXon) or
   2412                 (self._ymargin and scaley and self._autoscaleYon)):
-> 2413             stickies = [artist.sticky_edges for artist in self.get_children()]
   2414             x_stickies = np.array([x for sticky in stickies for x in sticky.x])
   2415             y_stickies = np.array([y for sticky in stickies for y in sticky.y])

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/axes/_base.py in <listcomp>(.0)
   2411                 (self._xmargin and scalex and self._autoscaleXon) or
   2412                 (self._ymargin and scaley and self._autoscaleYon)):
-> 2413             stickies = [artist.sticky_edges for artist in self.get_children()]
   2414             x_stickies = np.array([x for sticky in stickies for x in sticky.x])
   2415             y_stickies = np.array([y for sticky in stickies for y in sticky.y])

KeyboardInterrupt: 

In [ ]: