how to plot millions of points?


In [ ]:
import datashader as ds
import datashader.transfer_functions as tf
import dask.dataframe as dd
import numpy as np
from astroML.plotting import scatter_contour

In [ ]:
### data input for 5 million Sloan objects

In [ ]:
from astropy.table import Table

In [ ]:
sdss = Table.read('data/sdss_5M_gmr_rmi.fits').to_pandas()

In [ ]:
sdss.head()

In [ ]:
len(sdss)

In [ ]:
sdss = sdss[(np.abs(sdss['g_minus_r']) < 5) & (np.abs(sdss['r_minus_i']) < 5)]

scatter contour from astroML


In [ ]:
from astroML.plotting import scatter_contour
import matplotlib.pyplot as plt

In [ ]:
#scatter_contour?

In [ ]:
#------------------------------------------------------------
# plot the results
%matplotlib inline
#fig, ax = plt.subplots(figsize=(5, 3.75))
fig, ax = plt.subplots(figsize=(10, 7.5))
scatter_contour(sdss['g_minus_r'], sdss['r_minus_i'], threshold=200, log_counts=True, ax=ax,
                histogram2d_args=dict(bins=100),
                plot_args=dict(marker=',', linestyle='none', color='black'),
                contour_args=dict(cmap=plt.cm.bone))

ax.set_xlabel(r'${\rm g - r}$')
ax.set_ylabel(r'${\rm r - i}$')

ax.set_xlim(-0.6, 3.0)
ax.set_ylim(-0.6, 2.5)

plt.show()

A nicer-looking example


In [ ]:
# Author: Jake VanderPlas
# License: BSD
#   The figure produced by this code is published in the textbook
#   "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
#   For more information, see http://astroML.github.com
#   To report a bug or issue, use the following forum:
#    https://groups.google.com/forum/#!forum/astroml-general
from matplotlib import pyplot as plt

from astroML.plotting import scatter_contour
from astroML.datasets import fetch_sdss_S82standards

In [ ]:
#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX.  This may
# result in an error if LaTeX is not installed on your system.  In that case,
# you can set usetex to False.
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# Fetch the Stripe 82 standard star catalog

data = fetch_sdss_S82standards()

g = data['mmu_g']
r = data['mmu_r']
i = data['mmu_i']

In [ ]:
len(data)

In [ ]:
#------------------------------------------------------------
# plot the results
%matplotlib inline
#fig, ax = plt.subplots(figsize=(5, 3.75))
fig, ax = plt.subplots(figsize=(10, 7.5))
scatter_contour(g - r, r - i, threshold=200, log_counts=True, ax=ax,
                histogram2d_args=dict(bins=40),
                plot_args=dict(marker=',', linestyle='none', color='black'),
                contour_args=dict(cmap=plt.cm.bone))

ax.set_xlabel(r'${\rm g - r}$')
ax.set_ylabel(r'${\rm r - i}$')

ax.set_xlim(-0.6, 2.5)
ax.set_ylim(-0.6, 2.5)

plt.show()

experiments with datashader


In [ ]:
%%time
cvs = ds.Canvas(600, 600, (-1, 3), (-1, 2.5))
agg = cvs.points(sdss, 'g_minus_r', 'r_minus_i')

In [ ]:
black_background = True

#from IPython.core.display import HTML, display
#display(HTML("<style>.container { width:50% !important; }</style>"))

In [ ]:
def export(img,filename,fmt=".png",_return=True):
    """Given a datashader Image object, saves it to a disk file in the requested format"""
    if black_background: # Optional; removes transparency to force background for exported images 
        img=tf.set_background(img,"black")
    img.to_pil().save(filename+fmt)
    return img if _return else None

def cm(base_colormap, start=0, end=1.0, reverse=not black_background):
    """
    Given a colormap in the form of a list, such as a Bokeh palette,
    return a version of the colormap reversed if requested, and selecting
    a subset (on a scale 0,1.0) of the elements in the colormap list.
    
    For instance:
    
    >>> cmap = ["#000000", "#969696", "#d9d9d9", "#ffffff"]
    >>> cm(cmap,reverse=True)
    ['#ffffff', '#d9d9d9', '#969696', '#000000']
    >>> cm(cmap,0.3,reverse=True)
    ['#d9d9d9', '#969696', '#000000']
    """
    full = list(reversed(base_colormap)) if reverse else base_colormap
    num = len(full)
    return full[int(start*num):int(end*num)]

from datashader.colors import Greys9, Hot, viridis, inferno

In [ ]:
#export(tf.interpolate(agg, cmap=cm(viridis), how='eq_hist'),"gmr_rmi_eq_hist.png")

In [ ]:
x_range,y_range = ((-1,3), (-1,3))

In [ ]:
import bokeh.plotting as bp

bp.output_notebook()
#bp.output_file('sdss_color_color_datashader.html')

def base_plot(tools='pan,wheel_zoom,box_zoom,reset',webgl=False):
    p = bp.figure(tools=tools, 
        plot_width=int(400), plot_height=int(400),
        x_range=x_range, y_range=y_range, outline_line_color=None,
        min_border=10, min_border_left=10, min_border_right=10,
        min_border_top=10, min_border_bottom=10, webgl=webgl)
    
    p.axis.visible = True
    p.xgrid.grid_line_color = 'gray'
    p.ygrid.grid_line_color = 'gray'
    p.responsive = True
    
    return p

In [ ]:
#InteractiveImage?

In [ ]:
from datashader.callbacks import InteractiveImage

def image_callback2(x_range, y_range, w, h):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(sdss, 'g_minus_r', 'r_minus_i')
    img = tf.interpolate(agg, cmap = list(reversed(Greys9)))
    return tf.dynspread(img,threshold=0.75, max_px=12)
    #return tf.spread(img, mask=mask, how='over', px=5)
    #return tf.spread(img, how='over', px=3)

p = base_plot(webgl=False)
InteractiveImage(p, image_callback2, throttle=500)

In [ ]:


In [ ]:


In [ ]: