Parallel plot Using plotly:


In [16]:
!pip install plotly --upgrade


Collecting plotly
  Downloading plotly-2.0.15.tar.gz (1.0MB)
    100% |████████████████████████████████| 1.0MB 516kB/s ta 0:00:011   15% |█████                           | 163kB 755kB/s eta 0:00:02
Collecting decorator>=4.0.6 (from plotly)
  Downloading decorator-4.1.2-py2.py3-none-any.whl
Collecting nbformat>=4.2 (from plotly)
  Downloading nbformat-4.4.0-py2.py3-none-any.whl (155kB)
    100% |████████████████████████████████| 163kB 729kB/s ta 0:00:01
Requirement already up-to-date: pytz in /home/ale/anaconda3/lib/python3.6/site-packages (from plotly)
Collecting requests (from plotly)
  Downloading requests-2.18.4-py2.py3-none-any.whl (88kB)
    100% |████████████████████████████████| 92kB 1.0MB/s ta 0:00:011
Requirement already up-to-date: six in /home/ale/anaconda3/lib/python3.6/site-packages (from plotly)
Requirement already up-to-date: ipython-genutils in /home/ale/anaconda3/lib/python3.6/site-packages (from nbformat>=4.2->plotly)
Requirement already up-to-date: traitlets>=4.1 in /home/ale/anaconda3/lib/python3.6/site-packages (from nbformat>=4.2->plotly)
Requirement already up-to-date: jupyter-core in /home/ale/anaconda3/lib/python3.6/site-packages (from nbformat>=4.2->plotly)
Requirement already up-to-date: jsonschema!=2.5.0,>=2.4 in /home/ale/anaconda3/lib/python3.6/site-packages (from nbformat>=4.2->plotly)
Collecting urllib3<1.23,>=1.21.1 (from requests->plotly)
  Downloading urllib3-1.22-py2.py3-none-any.whl (132kB)
    100% |████████████████████████████████| 133kB 815kB/s ta 0:00:01
Collecting idna<2.7,>=2.5 (from requests->plotly)
  Downloading idna-2.6-py2.py3-none-any.whl (56kB)
    100% |████████████████████████████████| 61kB 1.0MB/s ta 0:00:011
Collecting certifi>=2017.4.17 (from requests->plotly)
  Downloading certifi-2017.7.27.1-py2.py3-none-any.whl (349kB)
    100% |████████████████████████████████| 358kB 461kB/s ta 0:00:011
Collecting chardet<3.1.0,>=3.0.2 (from requests->plotly)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133kB)
    100% |████████████████████████████████| 143kB 914kB/s ta 0:00:01
Building wheels for collected packages: plotly
  Running setup.py bdist_wheel for plotly ... done
  Stored in directory: /home/ale/.cache/pip/wheels/c9/c4/00/a80b040dd8c9301d29f7153881c96edf1cd8561977ec440941
Successfully built plotly
Installing collected packages: decorator, nbformat, urllib3, idna, certifi, chardet, requests, plotly
  Found existing installation: decorator 4.0.11
    Uninstalling decorator-4.0.11:
      Successfully uninstalled decorator-4.0.11
  Found existing installation: nbformat 4.3.0
    DEPRECATION: Uninstalling a distutils installed project (nbformat) has been deprecated and will be removed in a future version. This is due to the fact that uninstalling a distutils project will only partially uninstall the project.
    Uninstalling nbformat-4.3.0:
      Successfully uninstalled nbformat-4.3.0
  Found existing installation: idna 2.5
    Uninstalling idna-2.5:
      Successfully uninstalled idna-2.5
  Found existing installation: chardet 3.0.3
    Uninstalling chardet-3.0.3:
      Successfully uninstalled chardet-3.0.3
  Found existing installation: requests 2.14.2
    Uninstalling requests-2.14.2:
      Successfully uninstalled requests-2.14.2
  Found existing installation: plotly 2.0.12
    Uninstalling plotly-2.0.12:
      Successfully uninstalled plotly-2.0.12
Successfully installed certifi-2017.7.27.1 chardet-3.0.4 decorator-4.1.2 idna-2.6 nbformat-4.4.0 plotly-2.0.15 requests-2.18.4 urllib3-1.22

In [1]:
from plotly import __version__
print(__version__)
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
init_notebook_mode(connected=True)
from plotly.graph_objs import *

import pandas as pd
import seaborn as sns


2.0.15

In [2]:
df = sns.load_dataset("iris")

data = [
    Parcoords(
        line=dict(
            color=df['species'],
            colorscale=[[0, 'red'], [0.5, 'green'], [1, 'blue']]),
        dimensions=list([
            dict(
                range=[2, 4.5], label='Sepal Width', values=df['sepal_width']),
            dict(
                range=[4, 8],
                # constraintrange=[5, 6],
                label='Sepal Length',
                values=df['sepal_length']),
            dict(
                range=[0, 2.5], label='Petal Width', values=df['petal_width']),
            dict(
                range=[1, 7], label='Petal Length', values=df['petal_length'])
        ]))
]

iplot(data, filename='parcoords-basic')



In [3]:
import pandas as pd 
df = pd.read_csv("https://raw.githubusercontent.com/bcdunbar/datasets/master/parcoords_data.csv")

data = [
    Parcoords(
       line = dict(
                  color = df['colorVal'],
                  colorscale = 'Jet',
                  showscale = True,
                  reversescale = True,
                  cmin = -4000,
                  cmax = -100
                  ),
        dimensions = list([
            dict(range = [32000,227900],
                 constraintrange = [100000,150000],
                 label = 'Block Height', values = df['blockHeight']),
            dict(range = [0,700000],
                 label = 'Block Width', values = df['blockWidth']),
            dict(tickvals = [0,0.5,1,2,3],
                 ticktext = ['A','AB','B','Y','Z'],
                 label = 'Cyclinder Material', values = df['cycMaterial']),
            dict(range = [-1,4],
                 tickvals = [0,1,2,3],
                 label = 'Block Material', values = df['blockMaterial']),
            dict(range = [134,3154],
                 visible = True,
                 label = 'Total Weight', values = df['totalWeight']),
            dict(range = [9,19984],
                 label = 'Assembly Penalty Weight', values = df['assemblyPW']),
            dict(range = [49000,568000],
                 label = 'Height st Width', values = df['HstW']),
            dict(range = [-28000,196430],
                 label = 'Min Height Width', values = df['minHW']),
            dict(range = [98453,501789],
                 label = 'Min Width Diameter', values = df['minWD']),
            dict(range = [1417,107154],
                 label = 'RF Block', values = df['rfBlock'])
        ])
    )
]

#plot(data)
iplot(data, filename = 'parcoords-advanced.html') ## this seems to don't work anymore ... the html is blank



In [4]:
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly.graph_objs import *
init_notebook_mode()


def parallel_coordinates_plot(df,
                              use_cols,
                              color_col,
                              create_html='',
                              line={},
                              dimensions=[]):
    """
    Produce a parallel plot using the selected columns from the dataframe df. 
    It use offline plotly functions.
    - df: pandas DataFrame
    - use_cols: column to be used from df
    - color_col: dataframe column with integers that specify the color of the line for that observation
    - create_html: when different from '' this is the name of the html to be created with the parallel plot
    - line: update the line option of Parcoords (check https://plot.ly/python/reference/#parcoords)
    - dimensions: update the dimensions option of Parcoords (check https://plot.ly/python/reference/#parcoords)
    """

    myline = dict(color=df[color_col], showscale=True)
    if line != {}:
        myline.update()
    mydimensions = [
        dict(label=v, values=df[v])  # range=[df[v].min(), df[v].max()], 
        for v in use_cols
    ]
    if dimensions != []: mydimensions.extend(dimensions)

    data = [Parcoords(line=myline, dimensions=list(mydimensions))]
    iplot(data)
    if (create_html != ''):
        plot(data, filename=create_html)


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

In [5]:
df.columns


Out[5]:
Index(['colorVal', 'blockHeight', 'blockWidth', 'cycMaterial', 'blockMaterial',
       'totalWeight', 'assemblyPW', 'HstW', 'minHW', 'minWD', 'rfBlock'],
      dtype='object')

In [6]:
parallel_coordinates_plot(df, [
    'blockHeight', 'blockWidth', 'cycMaterial', 'blockMaterial', 'totalWeight', 'assemblyPW', 'rfBlock'
], 'colorVal', create_html='prova_CANCELLAMI.html')


Parallel Plot integrated in pandas, it produces really bad plot, no iterations are possible


In [7]:
import pandas
import matplotlib.pyplot as plt
%matplotlib inline
from pandas.tools.plotting import parallel_coordinates

In [9]:
data = sns.load_dataset("iris")

In [10]:
data.head()


Out[10]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

In [11]:
plt.figure(figsize=(20,10))
parallel_coordinates(data, 'species', cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
plt.show()


/home/ale/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning:

'pandas.tools.plotting.parallel_coordinates' is deprecated, import 'pandas.plotting.parallel_coordinates' instead.


In [ ]:


In [ ]:


In [ ]:


In [ ]: