Make sure vaex works.
In [1]:
    
import pandas as pd
import vaex as vx
%matplotlib inline
    
Now for SDSS data
In [2]:
    
DATA_PATH = '../../../data/sdss_dr7_photometry_source.csv.gz'
    
In [3]:
    
def load_data(data_cols=('ra', 'dec', 'redshift'),
              filter_col='class',
              filter_val='Galaxy'):
    
    data_cols = list(data_cols)
    
    data_iter = pd.read_csv(
        DATA_PATH,
        iterator=True,
        chunksize=100000,
        usecols=data_cols + [filter_col])
    
    if filter_col is None:
        # Filtering disabled
        data = pd.concat(data_iter)
    else:
        # Filter out anything that is not a galaxy without loading the whole file into memory.
        data = pd.concat(chunk[chunk[filter_col] == filter_val]
                         for chunk in data_iter)
    return data
data = load_data()
    
In [4]:
    
data.head()
    
    Out[4]:
In [5]:
    
dataset = vx.from_pandas(data, name='sdss')
    
In [6]:
    
dataset.plot('ra', 'dec', what='mean(redshift)', figsize=(12,8))
    
    Out[6]:
    
In [7]:
    
dataset.add_column_healpix()
    
In [8]:
    
dataset.healpix_plot(dataset.col.healpix, what='mean(redshift)',
                     show=True, healpix_input='galactic', healpix_output='galactic', figsize=(12,8))
    
    
    
In [ ]:
    
    
In [ ]: