Michelle's Data (HeCS_pared_small.npy)


In [1]:
import numpy as np
dat = np.load('./data/HeCS_pared_small.npy')

# for n in range(len(dat)):
#     print('')
#     Ngal = dat['Ngal'][n]
#     print('cluster index:', n)
#     print('velocity standard deviation:', np.std(dat['vlos'][n][0:Ngal])) 
#     print ('cluster M500 from SZ:', dat['MSZ'][n])
#     print ('cluster M200c:', dat['Mtot'][n])
#     print ('  ')

In [16]:
import pandas as pd

datasum = pd.DataFrame(columns = ['velsigma','M500_SZ','M200c'], index = range(len(dat)))

for n in range(len(dat)):
    Ngal = dat['Ngal'][n]
    
    velsigma = np.std(dat['vlos'][n][0:Ngal])
    m500 = dat['MSZ'][n]
    m200c = dat['Mtot'][n]
    
    datasum.loc[n] = pd.Series({'velsigma':velsigma ,
                                'M500_SZ' : m500 ,
                                'M200c' : m200c })

In [3]:
## PURE POWER LAW
alpha = 0.382
sig15 = 1244

## CONTAMINATED POWER LAW
# alpha = 0.359
# sig15 = 753

M_pow = (datasum['velsigma']/sig15)**(1/alpha) * 1.e15
M_pow.name = 'M_powlaw'

ep_m200c = (M_pow - datasum['M200c'])/datasum['M200c']
ep_m200c.name = 'ep_M200c'

datacalc = pd.concat([M_pow,ep_m200c], axis=1)

In [4]:
datares = datasum.join(datacalc)

datares.head(15)


Out[4]:
velsigma M500_SZ M200c M_powlaw ep_M200c
0 843.07 2.324e+14 2.77e+14 3.61164e+14 0.30384
1 747.848 2.681e+14 2.92e+14 2.63905e+14 -0.0962168
2 426.237 1.141e+14 1.19e+14 6.0573e+13 -0.490983
3 753.334 3.43e+14 2.5e+14 2.69003e+14 0.0760117
4 690.516 2.338e+14 1.77e+14 2.14175e+14 0.210027
5 485.47 1.309e+14 2.02e+14 8.51558e+13 -0.578437
6 670.823 1.26e+14 1.34e+14 1.98552e+14 0.48173
7 706.867 2.8e+14 2.26e+14 2.27706e+14 0.00754944
8 750.748 3.437e+14 2.04e+14 2.66592e+14 0.306824
9 781.662 3.409e+14 4.04e+14 2.96295e+14 -0.266597
10 751.004 8.036e+14 4.42e+14 2.6683e+14 -0.396312
11 822.919 3.878e+14 1.47e+14 3.39e+14 1.30612
12 823.836 4.956e+14 7.84e+14 3.39991e+14 -0.566338
13 674.572 2.842e+14 3.52e+14 2.01469e+14 -0.427644
14 697.52 2.555e+14 2.37e+14 2.19908e+14 -0.0721177

In [17]:
import matplotlib as mpl
% matplotlib inline
mpl.rcParams['figure.figsize'] = [10.,10.]

plt = mpl.pyplot

In [18]:
def plothist(X,Y, n=10):
    increment = (X.max() - X.min())/n
    
    i = X.min()
    
    y = []
    err = []
    while i < X.max():
        y.append(Y[(X>=i) & (X<(i+increment))].mean())
        err.append(Y[(X>=i) & (X<(i+increment))].std())
        i += increment
        
    plt.errorbar(np.array(range(n))*increment + X.min() + increment/2.,
                 y,
                 err,
                )

In [7]:
X = np.log10(datares['M200c'].astype('float64'))
Y = np.log10(datares['M_powlaw'].astype('float64'))
plt.plot(X,Y,'.')
plt.xlabel('log[ M200c ]', fontsize=20)
plt.ylabel('log[ M_powlaw ]', fontsize=20)

plothist(X,Y,10)


---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-7-af17469f3d2c> in <module>()
      5 plt.ylabel('log[ M_powlaw ]', fontsize=20)
      6 
----> 7 plothist(X,Y,10)

<ipython-input-6-36d135fad7a0> in plothist(X, Y, n)
     13     plt.errorbar(np.array(range(n))*increment + X.min() + increment/2.,
     14                  y,
---> 15                  err,
     16                 )

/usr/lib64/python2.7/site-packages/matplotlib/pyplot.pyc in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
   2764                           barsabove=barsabove, lolims=lolims, uplims=uplims,
   2765                           xlolims=xlolims, xuplims=xuplims,
-> 2766                           errorevery=errorevery, capthick=capthick, **kwargs)
   2767         draw_if_interactive()
   2768     finally:

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
   2819             noylims = ~(lolims | uplims)
   2820             if noylims.any():
-> 2821                 xo, _ = xywhere(x, lower, noylims & everymask)
   2822                 lo, uo = xywhere(lower, upper, noylims & everymask)
   2823                 barcols.append(self.vlines(xo, lo, uo, **lines_kw))

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in xywhere(xs, ys, mask)
   2720             ys are not arrays
   2721             """
-> 2722             assert len(xs) == len(ys)
   2723             assert len(xs) == len(mask)
   2724             xs = [thisx for thisx, b in zip(xs, mask) if b]

AssertionError: 

In [8]:
X = np.log10(datares['M200c'].astype('float64'))
Y = (datares['ep_M200c'].astype('float64'))
plt.plot(X,Y,'.')
plt.xlabel('log[ M200c ]', fontsize=20)
plt.ylabel(r'$\epsilon_{M200c}$', fontsize=20)

plothist(X,Y,10)


---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-8-12f2008d96b9> in <module>()
      5 plt.ylabel(r'$\epsilon_{M200c}$', fontsize=20)
      6 
----> 7 plothist(X,Y,10)

<ipython-input-6-36d135fad7a0> in plothist(X, Y, n)
     13     plt.errorbar(np.array(range(n))*increment + X.min() + increment/2.,
     14                  y,
---> 15                  err,
     16                 )

/usr/lib64/python2.7/site-packages/matplotlib/pyplot.pyc in errorbar(x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, hold, **kwargs)
   2764                           barsabove=barsabove, lolims=lolims, uplims=uplims,
   2765                           xlolims=xlolims, xuplims=xuplims,
-> 2766                           errorevery=errorevery, capthick=capthick, **kwargs)
   2767         draw_if_interactive()
   2768     finally:

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in errorbar(self, x, y, yerr, xerr, fmt, ecolor, elinewidth, capsize, barsabove, lolims, uplims, xlolims, xuplims, errorevery, capthick, **kwargs)
   2819             noylims = ~(lolims | uplims)
   2820             if noylims.any():
-> 2821                 xo, _ = xywhere(x, lower, noylims & everymask)
   2822                 lo, uo = xywhere(lower, upper, noylims & everymask)
   2823                 barcols.append(self.vlines(xo, lo, uo, **lines_kw))

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in xywhere(xs, ys, mask)
   2720             ys are not arrays
   2721             """
-> 2722             assert len(xs) == len(ys)
   2723             assert len(xs) == len(mask)
   2724             xs = [thisx for thisx, b in zip(xs, mask) if b]

AssertionError: 

In [9]:
n = 20

Ngal = dat['Ngal'][n]
vlos = dat['vlos'][n][0:Ngal]

h = plt.hist(np.absolute(vlos), 12)
plt.xlabel(r'$v_{los}$', fontsize=20)
plt.ylabel(r'$dN/dv$', fontsize=20)

print('cluster index:', n)
print('velocity standard deviation:', np.std(dat['vlos'][n][0:Ngal])) 
print ('cluster M500 from SZ:', dat['MSZ'][n])
print ('cluster M200c:', dat['Mtot'][n])


('cluster index:', 20)
('velocity standard deviation:', 690.93915)
('cluster M500 from SZ:', 2.9119999e+14)
('cluster M200c:', 4.1300003e+14)

Using HeCS to train SDM


In [10]:
type(dat)

for n in range(len(dat)):
    Ngal = dat['Ngal'][n]
    
    velsigma = np.std(dat['vlos'][n][0:Ngal])
    m500 = dat['MSZ'][n]
    m200c = dat['Mtot'][n]
    
    datasum.loc[n] = pd.Series({'velsigma':velsigma ,
                                'M500_SZ' : m500 ,
                                'M200c' : m200c })

In [11]:
dat = np.load('./data/HeCS_pared_small.npy')

featuresList = []
massList = []

numHalos = len(dat)

#loop through all of the halos
for h in range(numHalos):
        numSubs = dat['Ngal'][h]
        
        #make a numpy array that will hold, in this case, 3 features for each of the numSubs subhalos.  Be aware that SDM requires at least 2 dimensions, so if you want to use line of sight velocity, you'll have to fill in a dummy value for the 2nd dimension.  I used an array of 1.0's
        subfeat = np.zeros((numSubs, 2), 'f')
        
        #loop through the subhalos/galaxies in this halo:
        for sh in range (0, numSubs):
            #and fill in each of the features
            subfeat[sh][0] = np.abs(dat['vlos'][h][sh])
            subfeat[sh][1] = 1.0

        #append the numpy array of features to the featuresList
        featuresList.append(subfeat)
        #and the mass to the massList
        massList.append(dat['Mtot'])

#turn this into a features object:
# features = sdm.Features(featuresList, mass=massList)

#model.crossvalidate is the workhorse.  It will do the 10-fold crossvalidation to predict answers for all of your data.  It outputs root mean squared error (rmse) and mass predictions:
# rmse, predictions = model.crossvalidate(trainData, trainData.mass)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-4ab7e5b79063> in <module>()
     16         for sh in range (0, numSubs):
     17             #and fill in each of the features
---> 18             subfeat[sh][0] = np.abs(dat['vlos'][h][sh])
     19             subfeat[sh][1] = 1.0
     20 

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _index_fields(ary, names)
    315     if isinstance(names, basestring):
    316         try:
--> 317             return ary.getfield(dt.fields[names][0], dt.fields[names][1])
    318         except KeyError:
    319             raise ValueError("no field of name %s" % names)

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _getfield_is_safe(oldtype, newtype, offset)
    445     """
    446     new_fields = _get_all_field_offsets(newtype, offset)
--> 447     old_fields = _get_all_field_offsets(oldtype)
    448     # raises if there is a problem
    449     _check_field_overlap(new_fields, old_fields)

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _get_all_field_offsets(dtype, base_offset)
    357             sub_dtype = dtype.fields[name][0]
    358             sub_offset = dtype.fields[name][1] + base_offset
--> 359             fields.extend(_get_all_field_offsets(sub_dtype, sub_offset))
    360     else:
    361         if dtype.shape:

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in _get_all_field_offsets(dtype, base_offset)
    365                 count *= dim
    366             fields.extend((typ, off + dtype.base.itemsize*j)
--> 367                            for j in range(count) for (typ, off) in sub_offsets)
    368         else:
    369             fields.append((dtype, base_offset))

/usr/lib/python2.7/site-packages/numpy-1.10.1-py2.7-linux-x86_64.egg/numpy/core/_internal.pyc in <genexpr>((j,))
    365                 count *= dim
    366             fields.extend((typ, off + dtype.base.itemsize*j)
--> 367                            for j in range(count) for (typ, off) in sub_offsets)
    368         else:
    369             fields.append((dtype, base_offset))

KeyboardInterrupt: 

In [19]:
tempres = np.load('./tempresults.npy', encoding='latin1').item()
tempres.keys()


Out[19]:
['feat', 'mass', 'predictions', 'rmse']

In [20]:
X = np.log10(tempres['mass']).astype('float64')
Y = np.log10(tempres['predictions']).astype('float64')
plt.plot(X,Y,'.')
plt.xlabel('log[ M200c ]', fontsize=20)
plt.ylabel('log[ M_SDM ]', fontsize=20)

plothist(X,Y,10)



In [14]:
X.astype


Out[14]:
<function astype>

In [4]:
x = np.array([4,5,6,67])
x = np.append(x,124)

x[x<15]


Out[4]:
array([4, 5, 6])

In [1]:
import bs4

Minghan's Data (coma.p)


In [ ]:
comadat = np.load('./data/coma.p')[0]

In [ ]:
vlos = []
for i in comadat:
    vlos.append(i[0])
    
h = plt.hist(vlos, 15)
plt.xlabel(r'$v_{los}$', fontsize=20)
plt.ylabel(r'$dN/dv$', fontsize=20)

velsigma_coma = np.std(vlos)
M_pow_coma = (velsigma_coma/sig15)**(1/alpha) * 1.e15

print ('cluster: COMA')
print ('velocity standard deviation:', np.std(vlos))
print ('mass from power law:', "%.4g" % M_pow_coma)

In [ ]: