Advanced Visualization

A couple of different visualization libraries in this notebook. Starting w/ altair, running through some matplotlib, hitting bqplot, and ending with ipyvolume.

Altair



In [ ]:



In [ ]:



In [2]:

    
# %load ../scripts/altair-example.py
#!/usr/bin/env python
"""
Altair Tooltip Example
======================

An example script that Jonathan lightly modified to make it work in a Notebook.
"""
import sys

import altair as alt

alt.renderers.enable("notebook")

from vega_datasets import data

iris = data.iris()
# alt.renderers.enable('default')

alt.Chart(iris).mark_point().encode(x="petalLength", y="petalWidth", color="species")


cars = data.cars()

LITERS_PER_GALLON = 3.78541

cars["Miles_per_Liter"] = cars["Miles_per_Gallon"] * LITERS_PER_GALLON

alt.Chart(cars).mark_circle(size=60).encode(
    x="Horsepower",
    y="Miles_per_Gallon",
    color="Origin",
    tooltip=["Name", "Origin", "Horsepower", "Miles_per_Gallon"],
).interactive()



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [3]:

    
%matplotlib inline
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_context('poster')
sns.set_style('whitegrid') 
# sns.set_style('darkgrid') 
plt.rcParams['figure.figsize'] = 12, 8  # plotsize 

import numpy as np
import pandas as pd
from pandas.plotting import scatter_matrix
from sklearn.datasets import load_boston

import warnings
warnings.filterwarnings('ignore')

import matplotlib as mpl

Note

Using cleaned data from Data Cleaning Notebook. See Notebook for details.



In [4]:

    
df = pd.read_csv("../data/coal_prod_cleaned.csv")



In [5]:

    
df.head()









    Out[5]:







  
    
      
      MSHA_ID
      Average_Employees
      Company_Type
      Labor_Hours
      Mine_Basin
      Mine_County
      Mine_Name
      Mine_State
      Mine_Status
      Mine_Type
      Operating_Company
      Operating_Company_Address
      Operation_Type
      Production_short_tons
      Union_Code
      Year
    
  
  
    
      0
      103295
      18.0
      Independent Producer Operator
      39175.0
      Appalachia Southern
      Bibb
      Seymour Mine
      Alabama
      Active
      Surface
      Hope Coal Company Inc
      P.O. Box 249, Maylene, AL 35114
      Mine only
      105082.0
      NaN
      2008
    
    
      1
      103117
      19.0
      Operating Subsidiary
      29926.0
      Appalachia Southern
      Cullman
      Mine #2, #3, #4
      Alabama
      Active, men working, not producing
      Surface
      Twin Pines Coal Company Inc
      1874 County Road 15, Bremen, AL 35033
      Mine only
      10419.0
      NaN
      2008
    
    
      2
      103361
      20.0
      Operating Subsidiary
      42542.0
      Appalachia Southern
      Cullman
      Cold Springs West Mine
      Alabama
      Active
      Surface
      Twin Pines Coal Company
      74 Industrial Parkway, Jasper, AL 35502
      Mine only
      143208.0
      NaN
      2008
    
    
      3
      100759
      395.0
      Operating Subsidiary
      890710.0
      Appalachia Southern
      Fayette
      North River # 1 Underground Mi
      Alabama
      Active
      Underground
      Chevron Mining Inc
      3114 County Road 63 S, Berry, AL 35546
      Mine and Preparation Plant
      2923261.0
      United Mine Workers of America
      2008
    
    
      4
      103246
      22.0
      Independent Producer Operator
      55403.0
      Appalachia Southern
      Franklin
      Bear Creek
      Alabama
      Active
      Surface
      Birmingham Coal & Coke Co., In
      912 Edenton Street, Birmingham, AL 35242
      Mine only
      183137.0
      NaN
      2008



In [6]:

    
colors = sns.color_palette(n_colors=df.Year.nunique())



In [7]:

    
color_dict = dict(zip(sorted(df.Year.unique()), colors))
color_dict









    Out[7]:





{2008: (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 2009: (1.0, 0.4980392156862745, 0.054901960784313725),
 2010: (0.17254901960784313, 0.6274509803921569, 0.17254901960784313),
 2011: (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
 2012: (0.5803921568627451, 0.403921568627451, 0.7411764705882353)}



In [8]:

    
for year in sorted(df.Year.unique()[[0, 2, -1]]):
    plt.scatter(df[df.Year == year].Labor_Hours,
                df[df.Year == year].Production_short_tons, 
                c=np.c_[color_dict[year]],
                s=500,
                label=year,
               )
plt.xlabel("Total Hours Worked")
plt.ylabel("Total Amount Produced")
plt.legend()
plt.savefig("../ex1.png")



In [9]:

    
plt.style.available









    Out[9]:





['seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-ticks',
 'fivethirtyeight',
 'seaborn-whitegrid',
 'classic',
 '_classic_test',
 'fast',
 'seaborn-talk',
 'seaborn-dark-palette',
 'seaborn-bright',
 'seaborn-pastel',
 'grayscale',
 'seaborn-notebook',
 'ggplot',
 'seaborn-colorblind',
 'seaborn-muted',
 'seaborn',
 'Solarize_Light2',
 'seaborn-paper',
 'bmh',
 'tableau-colorblind10',
 'seaborn-white',
 'dark_background',
 'seaborn-poster',
 'seaborn-deep']



In [10]:

    
mpl.style.use('seaborn-colorblind')



In [ ]:



In [11]:

    
for year in sorted(df.Year.unique()[[0, 2, -1]]):
    plt.scatter(df[df.Year == year].Labor_Hours,
                df[df.Year == year].Production_short_tons, 
#                 c=color_dict[year],
                s=50,
                label=year,
               )
plt.xlabel("Total Hours Worked")
plt.ylabel("Total Amount Produced")
plt.legend();
# plt.savefig("ex1.png")



In [12]:

    
df_dict = load_boston()
features = pd.DataFrame(data=df_dict.data, columns = df_dict.feature_names)
target = pd.DataFrame(data=df_dict.target, columns = ['MEDV'])
df = pd.concat([features, target], axis=1)



In [13]:

    
df['Zone'] = df['ZN'].astype('category')
df.head()



In [14]:

    
# Target variable
fig, ax = plt.subplots(figsize=(6, 4))
sns.distplot(df.MEDV, ax=ax, rug=True, hist=False);



In [15]:

    
fig, ax = plt.subplots(figsize=(10,7))
sns.kdeplot(df.LSTAT,
            df.MEDV,
            ax=ax);



In [16]:

    
fig, ax = plt.subplots(figsize=(10, 10))
scatter_matrix(df[['MEDV', 'LSTAT', 'CRIM', 'RM', 'NOX', 'DIS']], alpha=0.2, diagonal='hist', ax=ax);



In [17]:

    
# fig, ax = plt.subplots(figsize=(10, 10))
sns.pairplot(data=df, 
             vars=['MEDV', 'LSTAT', 'CRIM', 'RM', 'NOX', 'DIS'],
             plot_kws={'s':20, 'alpha':0.5},
            );

Quantile cuts



In [18]:

    
players = pd.read_csv("../data/raw_players.csv.gz", compression='gzip')



In [19]:

    
players.head()









    Out[19]:







  
    
      
      playerShort
      birthday
      height
      weight
      position
      photoID
      rater1
      rater2
    
  
  
    
      0
      aaron-hughes
      08.11.1979
      182.0
      71.0
      Center Back
      3868.jpg
      0.25
      0.00
    
    
      1
      aaron-hunt
      04.09.1986
      183.0
      73.0
      Attacking Midfielder
      20136.jpg
      0.00
      0.25
    
    
      2
      aaron-lennon
      16.04.1987
      165.0
      63.0
      Right Midfielder
      13515.jpg
      0.25
      0.25
    
    
      3
      aaron-ramsey
      26.12.1990
      178.0
      76.0
      Center Midfielder
      94953.jpg
      0.00
      0.00
    
    
      4
      abdelhamid-el-kaoutari
      17.03.1990
      180.0
      73.0
      Center Back
      124913.jpg
      0.25
      0.25



In [20]:

    
weight_categories = ["vlow_weight",
                     "low_weight",
                     "mid_weight",
                     "high_weight",
                     "vhigh_weight",
                    ]

players['weightclass'] = pd.qcut(players['weight'],
                                 len(weight_categories),
                                 weight_categories)



In [21]:

    
players.head()









    Out[21]:







  
    
      
      playerShort
      birthday
      height
      weight
      position
      photoID
      rater1
      rater2
      weightclass
    
  
  
    
      0
      aaron-hughes
      08.11.1979
      182.0
      71.0
      Center Back
      3868.jpg
      0.25
      0.00
      low_weight
    
    
      1
      aaron-hunt
      04.09.1986
      183.0
      73.0
      Attacking Midfielder
      20136.jpg
      0.00
      0.25
      low_weight
    
    
      2
      aaron-lennon
      16.04.1987
      165.0
      63.0
      Right Midfielder
      13515.jpg
      0.25
      0.25
      vlow_weight
    
    
      3
      aaron-ramsey
      26.12.1990
      178.0
      76.0
      Center Midfielder
      94953.jpg
      0.00
      0.00
      mid_weight
    
    
      4
      abdelhamid-el-kaoutari
      17.03.1990
      180.0
      73.0
      Center Back
      124913.jpg
      0.25
      0.25
      low_weight

BQPlot

Examples here are shamelessly stolen from the amazing: https://github.com/maartenbreddels/jupytercon-2017/blob/master/jupytercon2017-widgets.ipynb



In [22]:

    
from IPython.display import YouTubeVideo



In [22]:

    
YouTubeVideo("uHPcshgTotE", width=560, height=315)









    Out[22]:



In [23]:

    
# mixed feelings about this import
import bqplot.pyplot as plt
import numpy as np



In [24]:

    
x = np.linspace(0, 2, 50)
y = x**2



In [25]:

    
fig = plt.figure()
scatter = plt.scatter(x, y)
plt.show()



In [27]:

    
fig.animation_duration = 500
scatter.y = x**.5



In [28]:

    
scatter.selected_style = {'stroke':'red', 'fill': 'orange'}
plt.brush_selector();



In [30]:

    
scatter.selected









    Out[30]:





[31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]



In [34]:

    
scatter.selected = [1,2,10,42,45]

ipyvolume



In [35]:

    
import ipyvolume as ipv



In [ ]:



In [36]:

    
N = 1000
x, y, z = np.random.random((3, N))



In [37]:

    
fig = ipv.figure()
scatter = ipv.scatter(x, y, z, marker='box')
ipv.show()



In [54]:

    
scatter.x = x + np.random.rand() - 0.5



In [58]:

    
scatter.color = "green"
scatter.size = 5



In [56]:

    
scatter.color = np.random.random((N,3))



In [57]:

    
scatter.size = 2



In [59]:

    
ex = ipv.datasets.animated_stream.fetch().data



In [60]:

    
ex.shape









    Out[60]:





(6, 200, 1250)



In [61]:

    
ex[:, ::, ::4].shape









    Out[61]:





(6, 200, 313)



In [62]:

    
ipv.figure()
ipv.style.use('dark')
quiver = ipv.quiver(*ipv.datasets.animated_stream.fetch().data[:,::,::4], size=5)
ipv.animation_control(quiver, interval=200)
ipv.show()
ipv.style.use('light')



In [63]:

    
quiver.geo = "cat"



In [68]:

    
# N = 1000*1000
N = 1000

x, y, z = np.random.random((3, N)).astype('f4')



In [69]:

    
ipv.figure()
s = ipv.scatter(x, y, z, size=0.2, )
ipv.show()



In [70]:

    
ipv.save("3d-example-plot.html")



In [71]:

    
!open 3d-example-plot.html



In [72]:

    
name = "Billy"



In [ ]:

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	MEDV	Zone
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98	24.0	18.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14	21.6	0.0
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03	34.7	0.0
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94	33.4	0.0
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33	36.2	0.0

	MSHA_ID	Average_Employees	Company_Type	Labor_Hours	Mine_Basin	Mine_County	Mine_Name	Mine_State	Mine_Status	Mine_Type	Operating_Company	Operating_Company_Address	Operation_Type	Production_short_tons	Union_Code	Year
0	103295	18.0	Independent Producer Operator	39175.0	Appalachia Southern	Bibb	Seymour Mine	Alabama	Active	Surface	Hope Coal Company Inc	P.O. Box 249, Maylene, AL 35114	Mine only	105082.0	NaN	2008
1	103117	19.0	Operating Subsidiary	29926.0	Appalachia Southern	Cullman	Mine #2, #3, #4	Alabama	Active, men working, not producing	Surface	Twin Pines Coal Company Inc	1874 County Road 15, Bremen, AL 35033	Mine only	10419.0	NaN	2008
2	103361	20.0	Operating Subsidiary	42542.0	Appalachia Southern	Cullman	Cold Springs West Mine	Alabama	Active	Surface	Twin Pines Coal Company	74 Industrial Parkway, Jasper, AL 35502	Mine only	143208.0	NaN	2008
3	100759	395.0	Operating Subsidiary	890710.0	Appalachia Southern	Fayette	North River # 1 Underground Mi	Alabama	Active	Underground	Chevron Mining Inc	3114 County Road 63 S, Berry, AL 35546	Mine and Preparation Plant	2923261.0	United Mine Workers of America	2008
4	103246	22.0	Independent Producer Operator	55403.0	Appalachia Southern	Franklin	Bear Creek	Alabama	Active	Surface	Birmingham Coal & Coke Co., In	912 Edenton Street, Birmingham, AL 35242	Mine only	183137.0	NaN	2008

	playerShort	birthday	height	weight	position	photoID	rater1	rater2
0	aaron-hughes	08.11.1979	182.0	71.0	Center Back	3868.jpg	0.25	0.00
1	aaron-hunt	04.09.1986	183.0	73.0	Attacking Midfielder	20136.jpg	0.00	0.25
2	aaron-lennon	16.04.1987	165.0	63.0	Right Midfielder	13515.jpg	0.25	0.25
3	aaron-ramsey	26.12.1990	178.0	76.0	Center Midfielder	94953.jpg	0.00	0.00
4	abdelhamid-el-kaoutari	17.03.1990	180.0	73.0	Center Back	124913.jpg	0.25	0.25