Make sure vaex works.
In [1]:
import pandas as pd
import vaex as vx
%matplotlib inline
Now for SDSS data
In [2]:
DATA_PATH = '../../../data/sdss_dr7_photometry_source.csv.gz'
In [3]:
def load_data(data_cols=('ra', 'dec', 'redshift'),
filter_col='class',
filter_val='Galaxy'):
data_cols = list(data_cols)
data_iter = pd.read_csv(
DATA_PATH,
iterator=True,
chunksize=100000,
usecols=data_cols + [filter_col])
if filter_col is None:
# Filtering disabled
data = pd.concat(data_iter)
else:
# Filter out anything that is not a galaxy without loading the whole file into memory.
data = pd.concat(chunk[chunk[filter_col] == filter_val]
for chunk in data_iter)
return data
data = load_data()
In [4]:
data.head()
Out[4]:
In [5]:
dataset = vx.from_pandas(data, name='sdss')
In [6]:
dataset.plot('ra', 'dec', what='mean(redshift)', figsize=(12,8))
Out[6]:
In [7]:
dataset.add_column_healpix()
In [8]:
dataset.healpix_plot(dataset.col.healpix, what='mean(redshift)',
show=True, healpix_input='galactic', healpix_output='galactic', figsize=(12,8))
In [ ]:
In [ ]: