Michelle's Data (HeCS_pared_small.npy)



In [1]:

    
import numpy as np
dat = np.load('./data/HeCS_pared_small.npy')

# for n in range(len(dat)):
#     print('')
#     Ngal = dat['Ngal'][n]
#     print('cluster index:', n)
#     print('velocity standard deviation:', np.std(dat['vlos'][n][0:Ngal])) 
#     print ('cluster M500 from SZ:', dat['MSZ'][n])
#     print ('cluster M200c:', dat['Mtot'][n])
#     print ('  ')



In [16]:

    
import pandas as pd

datasum = pd.DataFrame(columns = ['velsigma','M500_SZ','M200c'], index = range(len(dat)))

for n in range(len(dat)):
    Ngal = dat['Ngal'][n]
    
    velsigma = np.std(dat['vlos'][n][0:Ngal])
    m500 = dat['MSZ'][n]
    m200c = dat['Mtot'][n]
    
    datasum.loc[n] = pd.Series({'velsigma':velsigma ,
                                'M500_SZ' : m500 ,
                                'M200c' : m200c })



In [3]:

    
## PURE POWER LAW
alpha = 0.382
sig15 = 1244

## CONTAMINATED POWER LAW
# alpha = 0.359
# sig15 = 753

M_pow = (datasum['velsigma']/sig15)**(1/alpha) * 1.e15
M_pow.name = 'M_powlaw'

ep_m200c = (M_pow - datasum['M200c'])/datasum['M200c']
ep_m200c.name = 'ep_M200c'

datacalc = pd.concat([M_pow,ep_m200c], axis=1)



In [4]:

    
datares = datasum.join(datacalc)

datares.head(15)









    Out[4]:






  
    
      
      velsigma
      M500_SZ
      M200c
      M_powlaw
      ep_M200c
    
  
  
    
      0
      843.07
      2.324e+14
      2.77e+14
      3.61164e+14
      0.30384
    
    
      1
      747.848
      2.681e+14
      2.92e+14
      2.63905e+14
      -0.0962168
    
    
      2
      426.237
      1.141e+14
      1.19e+14
      6.0573e+13
      -0.490983
    
    
      3
      753.334
      3.43e+14
      2.5e+14
      2.69003e+14
      0.0760117
    
    
      4
      690.516
      2.338e+14
      1.77e+14
      2.14175e+14
      0.210027
    
    
      5
      485.47
      1.309e+14
      2.02e+14
      8.51558e+13
      -0.578437
    
    
      6
      670.823
      1.26e+14
      1.34e+14
      1.98552e+14
      0.48173
    
    
      7
      706.867
      2.8e+14
      2.26e+14
      2.27706e+14
      0.00754944
    
    
      8
      750.748
      3.437e+14
      2.04e+14
      2.66592e+14
      0.306824
    
    
      9
      781.662
      3.409e+14
      4.04e+14
      2.96295e+14
      -0.266597
    
    
      10
      751.004
      8.036e+14
      4.42e+14
      2.6683e+14
      -0.396312
    
    
      11
      822.919
      3.878e+14
      1.47e+14
      3.39e+14
      1.30612
    
    
      12
      823.836
      4.956e+14
      7.84e+14
      3.39991e+14
      -0.566338
    
    
      13
      674.572
      2.842e+14
      3.52e+14
      2.01469e+14
      -0.427644
    
    
      14
      697.52
      2.555e+14
      2.37e+14
      2.19908e+14
      -0.0721177



In [17]:

    
import matplotlib as mpl
% matplotlib inline
mpl.rcParams['figure.figsize'] = [10.,10.]

plt = mpl.pyplot



In [18]:

    
def plothist(X,Y, n=10):
    increment = (X.max() - X.min())/n
    
    i = X.min()
    
    y = []
    err = []
    while i < X.max():
        y.append(Y[(X>=i) & (X<(i+increment))].mean())
        err.append(Y[(X>=i) & (X<(i+increment))].std())
        i += increment
        
    plt.errorbar(np.array(range(n))*increment + X.min() + increment/2.,
                 y,
                 err,
                )



In [7]:

    
X = np.log10(datares['M200c'].astype('float64'))
Y = np.log10(datares['M_powlaw'].astype('float64'))
plt.plot(X,Y,'.')
plt.xlabel('log[ M200c ]', fontsize=20)
plt.ylabel('log[ M_powlaw ]', fontsize=20)

plothist(X,Y,10)









    



---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-7-af17469f3d2c> in <module>()
      5 plt.ylabel('log[ M_powlaw ]', fontsize=20)
      6 
----> 7 plothist(X,Y,10)

<ipython-input-6-36d135fad7a0> in plothist(X, Y, n)
     13     plt.errorbar(np.array(range(n))*increment + X.min() + increment/2.,
     14                  y,
---> 15                  err,
     16                 )

/usr/lib64/python2.7/site-packages/matplotlib/pyplot.pyc in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
   2764                           barsabove=barsabove, lolims=lolims, uplims=uplims,
   2765                           xlolims=xlolims, xuplims=xuplims,
-> 2766                           errorevery=errorevery, capthick=capthick, **kwargs)
   2767         draw_if_interactive()
   2768     finally:

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
   2819             noylims = ~(lolims | uplims)
   2820             if noylims.any():
-> 2821                 xo, _ = xywhere(x, lower, noylims & everymask)
   2822                 lo, uo = xywhere(lower, upper, noylims & everymask)
   2823                 barcols.append(self.vlines(xo, lo, uo, **lines_kw))

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in xywhere(xs, ys, mask)
   2720             ys are not arrays
   2721             """
-> 2722             assert len(xs) == len(ys)
   2723             assert len(xs) == len(mask)
   2724             xs = [thisx for thisx, b in zip(xs, mask) if b]

AssertionError:



In [8]:

    
X = np.log10(datares['M200c'].astype('float64'))
Y = (datares['ep_M200c'].astype('float64'))
plt.plot(X,Y,'.')
plt.xlabel('log[ M200c ]', fontsize=20)
plt.ylabel(r'$\epsilon_{M200c}$', fontsize=20)

plothist(X,Y,10)









    



---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-8-12f2008d96b9> in <module>()
      5 plt.ylabel(r'$\epsilon_{M200c}$', fontsize=20)
      6 
----> 7 plothist(X,Y,10)

<ipython-input-6-36d135fad7a0> in plothist(X, Y, n)
     13     plt.errorbar(np.array(range(n))*increment + X.min() + increment/2.,
     14                  y,
---> 15                  err,
     16                 )

/usr/lib64/python2.7/site-packages/matplotlib/pyplot.pyc in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
   2764                           barsabove=barsabove, lolims=lolims, uplims=uplims,
   2765                           xlolims=xlolims, xuplims=xuplims,
-> 2766                           errorevery=errorevery, capthick=capthick, **kwargs)
   2767         draw_if_interactive()
   2768     finally:

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
   2819             noylims = ~(lolims | uplims)
   2820             if noylims.any():
-> 2821                 xo, _ = xywhere(x, lower, noylims & everymask)
   2822                 lo, uo = xywhere(lower, upper, noylims & everymask)
   2823                 barcols.append(self.vlines(xo, lo, uo, **lines_kw))

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in xywhere(xs, ys, mask)
   2720             ys are not arrays
   2721             """
-> 2722             assert len(xs) == len(ys)
   2723             assert len(xs) == len(mask)
   2724             xs = [thisx for thisx, b in zip(xs, mask) if b]

AssertionError:



In [9]:

    
n = 20

Ngal = dat['Ngal'][n]
vlos = dat['vlos'][n][0:Ngal]

h = plt.hist(np.absolute(vlos), 12)
plt.xlabel(r'$v_{los}$', fontsize=20)
plt.ylabel(r'$dN/dv$', fontsize=20)

print('cluster index:', n)
print('velocity standard deviation:', np.std(dat['vlos'][n][0:Ngal])) 
print ('cluster M500 from SZ:', dat['MSZ'][n])
print ('cluster M200c:', dat['Mtot'][n])









    



('cluster index:', 20)
('velocity standard deviation:', 690.93915)
('cluster M500 from SZ:', 2.9119999e+14)
('cluster M200c:', 4.1300003e+14)

Using HeCS to train SDM



In [10]:

    
type(dat)

for n in range(len(dat)):
    Ngal = dat['Ngal'][n]
    
    velsigma = np.std(dat['vlos'][n][0:Ngal])
    m500 = dat['MSZ'][n]
    m200c = dat['Mtot'][n]
    
    datasum.loc[n] = pd.Series({'velsigma':velsigma ,
                                'M500_SZ' : m500 ,
                                'M200c' : m200c })



In [11]:

    
dat = np.load('./data/HeCS_pared_small.npy')

featuresList = []
massList = []

numHalos = len(dat)

#loop through all of the halos
for h in range(numHalos):
        numSubs = dat['Ngal'][h]
        
        #make a numpy array that will hold, in this case, 3 features for each of the numSubs subhalos.  Be aware that SDM requires at least 2 dimensions, so if you want to use line of sight velocity, you'll have to fill in a dummy value for the 2nd dimension.  I used an array of 1.0's
        subfeat = np.zeros((numSubs, 2), 'f')
        
        #loop through the subhalos/galaxies in this halo:
        for sh in range (0, numSubs):
            #and fill in each of the features
            subfeat[sh][0] = np.abs(dat['vlos'][h][sh])
            subfeat[sh][1] = 1.0

        #append the numpy array of features to the featuresList
        featuresList.append(subfeat)
        #and the mass to the massList
        massList.append(dat['Mtot'])

#turn this into a features object:
# features = sdm.Features(featuresList, mass=massList)

#model.crossvalidate is the workhorse.  It will do the 10-fold crossvalidation to predict answers for all of your data.  It outputs root mean squared error (rmse) and mass predictions:
# rmse, predictions = model.crossvalidate(trainData, trainData.mass)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-4ab7e5b79063> in <module>()
     16         for sh in range (0, numSubs):
     17             #and fill in each of the features
---> 18             subfeat[sh][0] = np.abs(dat['vlos'][h][sh])
     19             subfeat[sh][1] = 1.0
     20 

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _index_fields(ary, names)
    315     if isinstance(names, basestring):
    316         try:
--> 317             return ary.getfield(dt.fields[names][0], dt.fields[names][1])
    318         except KeyError:
    319             raise ValueError("no field of name %s" % names)

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _getfield_is_safe(oldtype, newtype, offset)
    445     """
    446     new_fields = _get_all_field_offsets(newtype, offset)
--> 447     old_fields = _get_all_field_offsets(oldtype)
    448     # raises if there is a problem
    449     _check_field_overlap(new_fields, old_fields)

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _get_all_field_offsets(dtype, base_offset)
    357             sub_dtype = dtype.fields[name][0]
    358             sub_offset = dtype.fields[name][1] + base_offset
--> 359             fields.extend(_get_all_field_offsets(sub_dtype, sub_offset))
    360     else:
    361         if dtype.shape:

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _get_all_field_offsets(dtype, base_offset)
    365                 count *= dim
    366             fields.extend((typ, off + dtype.base.itemsize*j)
--> 367                            for j in range(count) for (typ, off) in sub_offsets)
    368         else:
    369             fields.append((dtype, base_offset))

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in <genexpr>((j,))
    365                 count *= dim
    366             fields.extend((typ, off + dtype.base.itemsize*j)
--> 367                            for j in range(count) for (typ, off) in sub_offsets)
    368         else:
    369             fields.append((dtype, base_offset))

KeyboardInterrupt:



In [19]:

    
tempres = np.load('./tempresults.npy', encoding='latin1').item()
tempres.keys()









    Out[19]:





['feat', 'mass', 'predictions', 'rmse']



In [20]:

    
X = np.log10(tempres['mass']).astype('float64')
Y = np.log10(tempres['predictions']).astype('float64')
plt.plot(X,Y,'.')
plt.xlabel('log[ M200c ]', fontsize=20)
plt.ylabel('log[ M_SDM ]', fontsize=20)

plothist(X,Y,10)



In [14]:

    
X.astype









    Out[14]:





<function astype>



In [4]:

    
x = np.array([4,5,6,67])
x = np.append(x,124)

x[x<15]









    Out[4]:





array([4, 5, 6])



In [1]:

    
import bs4

Minghan's Data (coma.p)



In [ ]:

    
comadat = np.load('./data/coma.p')[0]



In [ ]:

    
vlos = []
for i in comadat:
    vlos.append(i[0])
    
h = plt.hist(vlos, 15)
plt.xlabel(r'$v_{los}$', fontsize=20)
plt.ylabel(r'$dN/dv$', fontsize=20)

velsigma_coma = np.std(vlos)
M_pow_coma = (velsigma_coma/sig15)**(1/alpha) * 1.e15

print ('cluster: COMA')
print ('velocity standard deviation:', np.std(vlos))
print ('mass from power law:', "%.4g" % M_pow_coma)



In [ ]:

	velsigma	M500_SZ	M200c	M_powlaw	ep_M200c
0	843.07	2.324e+14	2.77e+14	3.61164e+14	0.30384
1	747.848	2.681e+14	2.92e+14	2.63905e+14	-0.0962168
2	426.237	1.141e+14	1.19e+14	6.0573e+13	-0.490983
3	753.334	3.43e+14	2.5e+14	2.69003e+14	0.0760117
4	690.516	2.338e+14	1.77e+14	2.14175e+14	0.210027
5	485.47	1.309e+14	2.02e+14	8.51558e+13	-0.578437
6	670.823	1.26e+14	1.34e+14	1.98552e+14	0.48173
7	706.867	2.8e+14	2.26e+14	2.27706e+14	0.00754944
8	750.748	3.437e+14	2.04e+14	2.66592e+14	0.306824
9	781.662	3.409e+14	4.04e+14	2.96295e+14	-0.266597
10	751.004	8.036e+14	4.42e+14	2.6683e+14	-0.396312
11	822.919	3.878e+14	1.47e+14	3.39e+14	1.30612
12	823.836	4.956e+14	7.84e+14	3.39991e+14	-0.566338
13	674.572	2.842e+14	3.52e+14	2.01469e+14	-0.427644
14	697.52	2.555e+14	2.37e+14	2.19908e+14	-0.0721177