In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import numpy as np
import SDSS
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import copy
In [3]:
# We want to select galaxies, and then are only interested in their positions on the sky.
data = pd.read_csv("downloads/SDSSobjects.csv",usecols=['ra','dec','u','g',\
'r','i','size'])
# Filter out objects with bad magnitude or size measurements:
data = data[(data['u'] > 0) & (data['g'] > 0) & (data['r'] > 0) & (data['i'] > 0) & (data['size'] > 0)]
# Make size cuts, to exclude stars and nearby galaxies, and magnitude cuts, to get good galaxy detections:
data = data[(data['size'] > 0.8) & (data['size'] < 10.0) & (data['i'] > 17) & (data['i'] < 22)]
# Drop the things we're not so interested in:
del data['u'], data['g'], data['r'], data['i'],data['size']
data.head()
Out[3]:
In [4]:
Ngals = len(data)
ramin,ramax = np.min(data['ra']),np.max(data['ra'])
decmin,decmax = np.min(data['dec']),np.max(data['dec'])
print Ngals,"galaxy-like objects in (ra,dec) range (",ramin,":",ramax,",",decmin,":",decmax,")"
TreeCorr
code (Jarvis et al 2004) to compute this correlation function estimator efficiently. You can read more about better estimators starting from the TreeCorr wiki.
In [5]:
# !pip install --upgrade TreeCorr
In [6]:
random = pd.DataFrame({'ra' : ramin + (ramax-ramin)*np.random.rand(Ngals), 'dec' : decmin + (decmax-decmin)*np.random.rand(Ngals)})
In [7]:
print len(random), type(random)
Now let's plot both catalogs, and compare.
In [8]:
fig, ax = plt.subplots(nrows=1, ncols=2)
fig.set_size_inches(15, 6)
plt.subplots_adjust(wspace=0.2)
random.plot(kind='scatter', x='ra', y='dec', ax=ax[0], title='Random')
ax[0].set_xlabel('RA / deg')
ax[0].set_ylabel('Dec. / deg')
data.plot(kind='scatter', x='ra', y='dec', ax=ax[1], title='Data')
ax[1].set_xlabel('RA / deg')
ax[1].set_ylabel('Dec. / deg')
Out[8]:
In [9]:
import treecorr
random_cat = treecorr.Catalog(ra=random['ra'], dec=random['dec'], ra_units='deg', dec_units='deg')
data_cat = treecorr.Catalog(ra=data['ra'], dec=data['dec'], ra_units='deg', dec_units='deg')
# Set up some correlation function estimator objects:
sep_units='arcmin'
min_sep=0.5
max_sep=10.0
N = 7
bin_size = np.log10(1.0*max_sep/min_sep)/(1.0*N)
dd = treecorr.NNCorrelation(bin_size=bin_size, min_sep=min_sep, max_sep=max_sep, sep_units=sep_units, bin_slop=0.05/bin_size)
rr = treecorr.NNCorrelation(bin_size=bin_size, min_sep=min_sep, max_sep=max_sep, sep_units=sep_units, bin_slop=0.05/bin_size)
# Process the data:
dd.process(data_cat)
rr.process(random_cat)
# Combine into a correlation function and its variance:
xi, varxi = dd.calculateXi(rr)
In [10]:
plt.figure(figsize=(15,8))
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.errorbar(np.exp(dd.logr),xi,np.sqrt(varxi),c='blue',linewidth=2)
# plt.xscale('log')
plt.xlabel('$\\theta / {\\rm arcmin}$',fontsize=20)
plt.ylabel('$\\xi(\\theta)$',fontsize=20)
plt.ylim([-0.1,0.2])
plt.grid(True)
In [ ]: