In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from scipy.misc import logsumexp
from scipy.special import erf
from scipy.stats import maxwell
import astropy.coordinates as coord
import gary.coordinates as gc
import emcee
import triangle
In [31]:
halo_dispersion = 106. # km/s
rrl_vr_err = 15. # km/s
mgiant_vr_err = 2. # km/s
def ln_normal(x, mu, var):
return -0.5*np.log(2*np.pi) - 0.5*np.log(var) - 0.5 * (x-mu)**2 / var
First I'll fit a velocity model to the M-giants to derive the gradient and dispersion of the TriAnd population.
In [32]:
mgiants = np.genfromtxt("/Users/adrian/projects/triand-rrlyrae/data/triand_giants.txt", names=True)
mgiants.dtype.names
Out[32]:
In [33]:
plt.plot(mgiants['l'], mgiants['vr'], linestyle='none')
plt.ylim(-300,300)
Out[33]:
In [34]:
# mixture model - f_ol is outlier fraction
def ln_prior(p):
m,b,V,halo_v,f_ol = p
if m > 0. or m < -50:
return -np.inf
if b < 0 or b > 500:
return -np.inf
if V <= 0.:
return -np.inf
if f_ol > 1. or f_ol < 0.:
return -np.inf
if halo_v > 100 or halo_v < -100:
return -np.inf
return -np.log(V)
def Mg_likelihood(p, l, vr, sigma_vr):
m,b,sigma,halo_v,f_ol = p
V = sigma_vr**2 + sigma**2
term1 = ln_normal(vr, m*l + b, V)
term2 = ln_normal(vr, halo_v, halo_dispersion**2)
return np.array([term1, term2])
def Mg_ln_likelihood(p, *args):
m,b,V,halo_v,f_ol = p
x = Mg_likelihood(p, *args)
# coefficients
b = np.zeros_like(x)
b[0] = 1-f_ol
b[1] = f_ol
return logsumexp(x,b=b, axis=0)
def Mg_ln_posterior(p, *args):
lnp = ln_prior(p)
if np.isinf(lnp):
return -np.inf
return lnp + Mg_ln_likelihood(p, *args).sum()
def outlier_prob(p, *args):
m,b,V,halo_v,f_ol = p
p1,p2 = Mg_likelihood(p, *args)
return f_ol*np.exp(p2) / ((1-f_ol)*np.exp(p1) + f_ol*np.exp(p2))
In [35]:
nwalkers = 32
Mg_sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=5, lnpostfn=Mg_ln_posterior,
args=(mgiants['l'],mgiants['vr'],mgiant_vr_err))
In [36]:
# initialize walkers
p0 = np.zeros((nwalkers,Mg_sampler.dim))
p0[:,0] = np.random.normal(-1, 0.1, size=nwalkers)
p0[:,1] = np.random.normal(150, 0.1, size=nwalkers)
p0[:,2] = np.random.normal(25, 0.5, size=nwalkers)
p0[:,3] = np.random.normal(0., 10., size=nwalkers)
p0[:,4] = np.random.normal(0.1, 0.01, size=nwalkers)
for pp in p0:
lnp = Mg_ln_posterior(pp, *Mg_sampler.args)
if not np.isfinite(lnp):
print("you suck")
In [37]:
pos,prob,state = Mg_sampler.run_mcmc(p0, N=100)
Mg_sampler.reset()
pos,prob,state = Mg_sampler.run_mcmc(pos, N=1000)
In [38]:
fig = triangle.corner(Mg_sampler.flatchain,
labels=[r'$\mathrm{d}v/\mathrm{d}l$', r'$v_0$', r'$\sigma_v$', r'$v_{\rm halo}$', r'$f_{\rm halo}$'])
In [39]:
map_dvdl,map_v0,map_sigma_triand,map_vhalo,map_fhalo = Mg_sampler.flatchain[Mg_sampler.flatlnprobability.argmax()]
From this, we can run a mixture model and ask what fraction of the RR Lyrae we have could be a part of the same M-giant population, defined by velocity
In [19]:
# rrl = np.genfromtxt("/Users/adrian/projects/triand-rrlyrae/data/RRL_ALL.txt", names=True, dtype=None)
rrl = np.genfromtxt("/Users/adrian/projects/triand-rrlyrae/data/TriAnd_RRL_19mar15.csv",
skiprows=0, dtype=None, names=True, delimiter=',')
print rrl.dtype.names
In [25]:
c = coord.SkyCoord(ra=rrl['ra']*u.deg, dec=rrl['dec']*u.deg)
rrl_l = c.galactic.l.degree
rrl_vgsr = gc.vhel_to_vgsr(c, rrl['Vsys']*u.km/u.s,
vcirc=236.*u.km/u.s,
vlsr=[11.1,12.24,7.25]*u.km/u.s).value # same as Sheffield et al. 2014
verr =rrl['Err']
The likelihood is something like this:
We'll fix the mean and variance of the halo: $$ \mu_{\rm h},\sigma_{\rm h} = (0.,106)~{\rm km~s}^{-1}\\ $$
$f_{\rm h}$ is the fraction of halo stars in the sample of RR Lyrae.
We'll infer the halo fraction using the likelihood: $$ p(D,v \,|\, f_{\rm h}) = p(D \,|\, v) \, p(v \,|\, f_{\rm h})\\ p(v \,|\, f_{\rm h}) = f_{\rm h}\,p(v \,|\, \mu_{\rm h},\sigma_{\rm h}) + (1-f_{\rm h})\,p(v \,|\, v_{0,\rm TriAnd},{\rm d}v/{\rm d}l,\sigma_{\rm TriAnd}) $$
With observed velocities, we instead use the marginal likelihood: $$ p(D \,|\, f_{\rm h}) = \int p(D,v \,|\, f_{\rm h}) dv $$
In [21]:
mu_h,sigma_h = (0., 106.) # km/s - Cite W. Brown (2009)?
In [42]:
def rrl_ln_likelihood(p, v_obs, v_err, v0, dv_dl, sigma_triand, vhalo, l):
f_h = p[0]
V1 = v_err**2 + sigma_h**2
V2 = v_err**2 + sigma_triand**2
term1 = ln_normal(v_obs, vhalo, V1)
term2 = ln_normal(v_obs, dv_dl*l + v0, V2)
x = np.array([term1, term2])
# coefficients
b = np.zeros_like(x)
b[0] = f_h
b[1] = 1-f_h
return logsumexp(x, b=b, axis=0)
def rrl_ln_prior(p):
f_h = p[0]
if f_h > 1. or f_h < 0.:
return -np.inf
return 0.
def rrl_ln_posterior(p, *args):
lnp = ln_prior(p)
if np.isinf(lnp):
return -np.inf
return lnp + ln_likelihood(p, *args).sum()
In [43]:
fig,axes = plt.subplots(1,2,figsize=(12,6))
fs = np.linspace(0.,1.,100)
for i in np.random.randint(len(Mg_sampler.flatchain), size=100):
dvdl,v0,sigmav,vhalo,fh = Mg_sampler.flatchain[i]
ls = np.zeros_like(fs)
for i,f in enumerate(fs):
ls[i] = rrl_ln_likelihood([f], rrl_vgsr, verr, v0, dvdl, sigmav, vhalo, rrl_l).sum()
axes[0].plot(1-fs, ls, marker=None, alpha=0.25, c='k')
axes[1].plot(1-fs, np.exp(ls-ls.max()), marker=None, alpha=0.25, c='k')
In [44]:
nrrlyrae = 77.
nmgiants = 74.
In [46]:
fig,ax = plt.subplots(1,1,figsize=(6,6))
fs = np.linspace(0.,1.,100)
for i in np.random.randint(len(Mg_sampler.flatchain), size=100):
dvdl,v0,sigmav,vhalo,fh = Mg_sampler.flatchain[i]
ls = np.zeros_like(fs)
for i,f in enumerate(fs):
ls[i] = rrl_ln_likelihood([f], rrl_vgsr, verr, v0, dvdl, sigmav, vhalo, rrl_l).sum()
ax.plot((1-fs) * nrrlyrae/0.8 / nmgiants, np.exp(ls-ls.max()), marker=None, alpha=0.25, c='k')
ax.set_xlabel(r"$f_{\rm RR:MG}$", fontsize=24)
ax.set_xlim(0,1)
Out[46]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [101]:
nwalkers = 32
sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior,
args=(rrl['vgsr'], vr_err, map_v0, map_dvdl, map_sigma_triand, rrl['l']))
p0 = np.zeros((nwalkers,sampler.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)
for pp in p0:
lnp = ln_posterior(pp, *sampler.args)
if not np.isfinite(lnp):
print("you suck")
In [102]:
pos,prob,state = sampler.run_mcmc(p0, N=100)
sampler.reset()
pos,prob,state = sampler.run_mcmc(pos, N=1000)
In [103]:
fs = np.linspace(0.,1.,100)
ls = np.zeros_like(fs)
for i,f in enumerate(fs):
ls[i] = ln_likelihood([f], rrl['vgsr'], vr_err, map_v0, map_dvdl, map_sigma_triand, rrl['l']).sum()
fig,axes = plt.subplots(1,2,figsize=(12,6))
axes[0].plot(fs,ls)
axes[1].plot(fs,np.exp(ls-ls.max()))
axes[1].axvline(true_f_h)
# axes[1].set_xlim(0.8,1)
Out[103]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Here I'll generate some fake data and observe it ...
In [17]:
np.random.seed(42)
N = 1000
true_f_h = 1.
N_h = int(true_f_h*N)
N_T = N-int(true_f_h*N)
true_v = np.append(np.random.normal(mu_h, sigma_h, size=N_h),
np.random.normal(mu_T, sigma_T, size=N_T))
# generate with the wrong distribution
# true_v = np.append(np.random.normal(mu_h, sigma_h-25, size=N_h),
# np.random.normal(mu_T, sigma_T, size=N_T))
print("N halo: {}, N TriAnd: {}".format(N_h, N_T))
v_err_18 = 15.
v_obs_18 = np.random.normal(true_v, v_err_18)
v_obs_18 = v_obs_18[:18]
v_err_179 = 15.
v_obs_179 = np.random.normal(true_v, v_err_179)
v_obs_179 = v_obs_179[:233]
In [18]:
nwalkers = 32
mock_sampler_1 = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior,
args=(v_obs_18, v_err_18))
p0 = np.zeros((nwalkers,mock_sampler_1.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)
for pp in p0:
lnp = ln_posterior(pp, *mock_sampler_1.args)
if not np.isfinite(lnp):
print("you suck")
pos,prob,state = mock_sampler_1.run_mcmc(p0, N=100)
mock_sampler_1.reset()
pos,prob,state = mock_sampler_1.run_mcmc(pos, N=1000)
In [19]:
nwalkers = 32
mock_sampler_2 = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior,
args=(v_obs_179, v_err_179))
p0 = np.zeros((nwalkers,mock_sampler_2.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)
for pp in p0:
lnp = ln_posterior(pp, *mock_sampler_2.args)
if not np.isfinite(lnp):
print("you suck")
pos,prob,state = mock_sampler_2.run_mcmc(p0, N=100)
mock_sampler_2.reset()
pos,prob,state = mock_sampler_2.run_mcmc(pos, N=1000)
In [20]:
len(d[:,3])
Out[20]:
In [21]:
fs = np.linspace(0.,1.,100)
ls = np.zeros_like(fs)
for i,f in enumerate(fs):
ls[i] = ln_likelihood([f], d[:,3], 10.).sum()
fig,axes = plt.subplots(1,2,figsize=(12,6))
axes[0].plot(fs,ls)
axes[1].plot(fs,np.exp(ls-ls.max()))
axes[1].axvline(true_f_h)
# axes[1].set_xlim(0.8,1)
Out[21]:
In [22]:
vr_err = 15. # km/s
nwalkers = 32
sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior,
args=(d[:,3], vr_err))
p0 = np.zeros((nwalkers,sampler.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)
for pp in p0:
lnp = ln_posterior(pp, *sampler.args)
if not np.isfinite(lnp):
print("you suck")
pos,prob,state = sampler.run_mcmc(p0, N=100)
sampler.reset()
pos,prob,state = sampler.run_mcmc(pos, N=1000)
In [23]:
fig,axes = plt.subplots(1,2,figsize=(14,6), sharex=True, sharey=True)
axes[0].hist((1-sampler.flatchain)*179, bins=np.linspace(0.,179.,50), normed=True);
axes[0].set_xlabel(r"Number of RR Lyrae in Triand")
axes[0].set_ylabel(r"Probability")
axes[0].set_title("RR Lyrae data")
axes[0].axvline(109, linewidth=3., linestyle='dashed', color='#555555')
axes[0].text(104, 0.04, "Number of M-giants\n in Triand", fontsize=18, ha='right')
axes[0].set_yticklabels([])
[ll.set_fontsize(18) for ll in axes[0].xaxis.get_ticklabels()]
axes[1].hist((1-mock_sampler_1.flatchain)*179, bins=np.linspace(0.,179.,50), alpha=0.5, normed=True, label='18 stars');
axes[1].hist((1-mock_sampler_2.flatchain)*179, bins=np.linspace(0.,179.,50), alpha=0.5, normed=True, label='179 stars');
axes[1].set_xlabel(r"Number of RR Lyrae in Triand")
axes[1].legend(fontsize=18)
axes[1].set_title("Simulated data")
[ll.set_fontsize(18) for ll in axes[1].xaxis.get_ticklabels()]
# fig.savefig("/Users/adrian/projects/triand-rrlyrae/plots/ftriand.pdf")
Out[23]:
In [ ]: