In [ ]:
from __future__ import print_function, division
from exosyspop.populations import KeplerPowerLawBinaryPopulation
from exosyspop.survey import DetectionRamp

import logging
rootLogger = logging.getLogger()
rootLogger.setLevel(logging.INFO)

In [3]:
import sys
sys.path.append('..')

from simpleabc.simple_abc import Model, basic_abc, pmc_abc

import numpy as np
from scipy.stats import gaussian_kde, entropy, uniform
import logging

class ABCModel(Model):

    params = ('fB', 'beta', 'beta_a', 'beta_b') # Names of parameters
    summary_stat_names = ('period_pdf','N',
                          'phase_sec') # names of summary statistics
    distance_functions = ('d_period', 'd_N',
                          'd_fsec', 'd_phase') # names of different distance function methods 

    theta_0 = (0.14, -0.95, 0.8, 2.0)
    bounds = [(0,1), (-1.5,0), (0,5),(0,5)]
    prior = [uniform(0,1), uniform(-1.5, 1.5), uniform(0,5), uniform(0,5)]    
    
    
    def __init__(self, population, eff=None):
        self.population = population
        self.eff = eff

    def draw_theta(self):
        return [p.rvs() for p in self.prior]
        #theta = []
        #for p,(lo,hi) in zip(self.priors, self.bounds):
        #    if p=='uniform':
        #        theta.append(np.random.random()*(hi-lo) + lo)
        #return theta

    def generate_data(self, theta):
        param_dict = {p:v for p,v in zip(self.params, theta)}
        self.population.set_params(**param_dict)
        try:
            return self.population.observe(new=True, regr_trap=True).observe(self.eff)
        except KeyboardInterrupt:
            raise
        except:
            logging.warning('Error generating data: {}'.format(param_dict))

    @property
    def min_period(self):
        return self.population.params['period_min']
    
    @property
    def max_period(self):
        return self.population.params['period_max']

    def summary_stats(self, data):
        """Returns tuple containing summary statistics named in summary_stat_names
        """
        if data is None:
            return [None]*len(self.summary_stat_names)

        N = len(data)
        min_logP, max_logP = np.log(self.min_period), np.log(self.max_period)
        logP_grid = np.linspace(min_logP, max_logP, 1000)
        if N > 1:
            k = gaussian_kde(np.log(data.period.values))
            logP_pdf = k(logP_grid)
        else:
            logP_pdf = np.ones(len(logP_grid))*1./(max_logP - min_logP)

        phase_sec = data.phase_sec.dropna().values
            
        return logP_pdf, N, phase_sec

    def d_period(self, summary_stats, summary_stats_synth):
        p1 = summary_stats[0]
        p2 = summary_stats_synth[0]
        try:
            len(p1)
            len(p2)
        except:
            return np.inf
        kl_period = entropy(p1, p2)
        return kl_period
    
    def Ndist(self, N1, N2):
        if N1==0. or N2==0. or N1 is None or N2 is None:
            dist = 1
        else:
            dist = max(1 - 1.*N1/N2, 1-1*N2/N1)
        return dist
            
    def d_N(self, summary_stats, summary_stats_synth):
        N1 = summary_stats[1]
        N2 = summary_stats_synth[1]
        d = self.Ndist(N1, N2)
        #logging.info('{}, {}, {}'.format(N1, N2, d))
        return d 
        
    def d_fsec(self, summary_stats, summary_stats_synth):
        N1, phase_sec1 = summary_stats[1:3]
        N2, phase_sec2 = summary_stats_synth[1:3]
        
        f_sec1 = len(phase_sec1)/float(N1)
        f_sec2 = len(phase_sec2)/float(N2)
        
        return np.absolute(f_sec1 - f_sec2)

    def d_phase(self, summary_stats, summary_stats_synth, nbins=11):
        phase_sec1 = summary_stats[2]
        phase_sec2 = summary_stats_synth[2]

        try:
            len(phase_sec2)
        except:
            return np.inf
        
        if len(phase_sec1) < 2 or len(phase_sec2) < 2:
            return np.inf
        
        k1 = gaussian_kde(phase_sec1)
        k2 = gaussian_kde(phase_sec2)

        phs = np.linspace(0,1,100)
        pdf1 = k1(phs)
        pdf2 = k2(phs)
        
        return entropy(pdf1, pdf2)
        
    def null_distance_test(self, theta=None, N=100):
        if theta is None:
            theta = self.theta_0

        logging.info('Performing null distance test (N={})'.format(N))
        data1 = [self.generate_data(theta) for i in range(N)]
        data2 = [self.generate_data(theta) for i in range(N)]
        
        ds = []
        for dfn in self.distance_functions:
            fn = getattr(self, dfn)
            ds.append([fn(self.summary_stats(data1[i]),
                         self.summary_stats(data2[i])) for i in range(N)])
        
        null_stds = np.array([np.std(d) for d in ds])
        self._distance_norms = null_stds / null_stds[0]
        logging.info('Typical null distance = {}'.format(np.median(ds[0])*\
                                                        len(self.distance_functions)))
        return ds
        
    @property
    def distance_norms(self):
        if not hasattr(self, '_distance_norms'):
            self.null_distance_test()
            
        return self._distance_norms 


    def distance_function(self, stats, stats_synth):

        ds = []
        for dfn in self.distance_functions:
            fn = getattr(self, dfn)
            ds.append(fn(stats, stats_synth))

        return np.sum([d / self.distance_norms[i] for i,d in enumerate(ds)])

In [4]:
pop = KeplerPowerLawBinaryPopulation.load('plaw_pop')
pop.set_params(period_min=20, period_max=1200, beta=-0.95, fB=0.14)

eff = DetectionRamp(6,16)

data = pop.observe(new=True, regr_trap=True).observe(eff)

model = ABCModel(pop, eff)

In [4]:
import numpy as np
model._distance_norms = np.array([ 1.        ,  4.49241213,  2.60025772,  2.73734061])
model.distance_norms


Out[4]:
array([ 1.        ,  4.49241213,  2.60025772,  2.73734061])

In [6]:
pmc_posterior = pmc_abc(model, data, epsilon_0=0.5, min_samples=200, steps=20, verbose=True,
                       parallel=True, n_procs=4)


Starting step 0, epsilon=0.5
Running 50.0 particles on 4 processors
step took 1.03605414629 minutes.
Starting step 1, epsilon=0.412453051083
Running 50.0 particles on 4 processors
Process ABCProcess-10:
Process ABCProcess-11:
Process ABCProcess-12:
Process ABCProcess-1:
Process ABCProcess-9:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/tdm/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/home/tdm/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/home/tdm/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/home/tdm/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/home/tdm/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
    self.run()
    self.run()
    self.run()
    self.run()
  File "../simpleabc/simple_abc.py", line 24, in run
  File "../simpleabc/simple_abc.py", line 24, in run
  File "../simpleabc/simple_abc.py", line 24, in run
  File "../simpleabc/simple_abc.py", line 24, in run
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "../simpleabc/simple_abc.py", line 599, in parallel_basic_abc
  File "../simpleabc/simple_abc.py", line 599, in parallel_basic_abc
  File "../simpleabc/simple_abc.py", line 599, in parallel_basic_abc
  File "../simpleabc/simple_abc.py", line 599, in parallel_basic_abc
    output.put(basic_abc(data, model, **kwds))
    output.put(basic_abc(data, model, **kwds))
    output.put(basic_abc(data, model, **kwds))
    output.put(basic_abc(data, model, **kwds))
  File "../simpleabc/simple_abc.py", line 242, in basic_abc
  File "../simpleabc/simple_abc.py", line 242, in basic_abc
  File "../simpleabc/simple_abc.py", line 242, in basic_abc
  File "../simpleabc/simple_abc.py", line 242, in basic_abc
    synthetic_data = model.generate_data(theta)
    synthetic_data = model.generate_data(theta)
    synthetic_data = model.generate_data(theta)
    synthetic_data = model.generate_data(theta)
  File "<ipython-input-2-fd2a3ba05e22>", line 39, in generate_data
  File "<ipython-input-2-fd2a3ba05e22>", line 39, in generate_data
  File "<ipython-input-2-fd2a3ba05e22>", line 39, in generate_data
  File "<ipython-input-2-fd2a3ba05e22>", line 39, in generate_data
    return self.population.observe(new=True, regr_trap=True).observe(self.eff)
    return self.population.observe(new=True, regr_trap=True).observe(self.eff)
    return self.population.observe(new=True, regr_trap=True).observe(self.eff)
  File "exosyspop/populations.py", line 880, in observe
  File "exosyspop/populations.py", line 879, in observe
  File "exosyspop/populations.py", line 712, in observe
    catalog['noise_sec'] = self.get_noise(catalog.host, catalog.T14_sec)
    catalog['noise_pri'] = self.get_noise(catalog.host, catalog.T14_pri)
    self._generate_binaries(use_ic=use_ic)
    return self.population.observe(new=True, regr_trap=True).observe(self.eff)
  File "exosyspop/populations.py", line 1314, in get_noise
  File "exosyspop/populations.py", line 1305, in get_noise
  File "exosyspop/populations.py", line 468, in _generate_binaries
    y = y0 + (y1 - y0)*(T - x0)/(x1 - x0)
    y0 = np.diag(s[col0])
    qR = self._qR_pipeline.predict(X)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/ops.py", line 572, in wrapper
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/frame.py", line 1963, in __getitem__
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/utils/metaestimators.py", line 37, in <lambda>
    def wrapper(left, right, name=name, na_op=na_op):
    return self._getitem_array(key)
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
KeyboardInterrupt
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/frame.py", line 2008, in _getitem_array
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/pipeline.py", line 204, in predict
    return self.steps[-1][-1].predict(Xt)
    return self.take(indexer, axis=1, convert=True)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 658, in predict
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 1371, in take
    convert=True, verify=True)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3628, in take
    axis=axis, allow_dups=True)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-6-8066295ee66a> in <module>()
      1 pmc_posterior = pmc_abc(model, data, epsilon_0=0.5, min_samples=200, steps=20, verbose=True,
----> 2                        parallel=True, n_procs=4)

/home/tdm/repositories/simpleabc/simple_abc.pyc in pmc_abc(model, data, epsilon_0, min_samples, steps, resume, parallel, n_procs, sample_only, verbose)
    442                     p.start()
    443                 for p in processes:
--> 444                     p.join()
    445                 results = [output.get() for p in processes]
    446 

/home/tdm/anaconda2/lib/python2.7/multiprocessing/process.pyc in join(self, timeout)
    143         assert self._parent_pid == os.getpid(), 'can only join a child process'
    144         assert self._popen is not None, 'can only join a started process'
--> 145         res = self._popen.wait(timeout)
    146         if res is not None:
    147             _current_process._children.discard(self)

/home/tdm/anaconda2/lib/python2.7/multiprocessing/forking.pyc in wait(self, timeout)
    152         def wait(self, timeout=None):
    153             if timeout is None:
--> 154                 return self.poll(0)
    155             deadline = time.time() + timeout
    156             delay = 0.0005

/home/tdm/anaconda2/lib/python2.7/multiprocessing/forking.pyc in poll(self, flag)
    133                 while True:
    134                     try:
--> 135                         pid, sts = os.waitpid(self.pid, flag)
    136                     except os.error as e:
    137                         if e.errno == errno.EINTR:

KeyboardInterrupt: 
  File "exosyspop/populations.py", line 712, in observe
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3519, in reindex_indexer
  File "../simpleabc/simple_abc.py", line 24, in run
    for e in self.estimators_)
    return self.__class__(new_blocks, new_axes)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 2535, in __init__
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 800, in __call__
    self._generate_binaries(use_ic=use_ic)
    self._target(*self._args, **self._kwargs)
    self._rebuild_blknos_and_blklocs()
    while self.dispatch_one_batch(iterator):
  File "exosyspop/populations.py", line 496, in _generate_binaries
  File "../simpleabc/simple_abc.py", line 599, in parallel_basic_abc
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 2625, in _rebuild_blknos_and_blklocs
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 658, in dispatch_one_batch
    self._remove_prop(c)
    output.put(basic_abc(data, model, **kwds))
    if (new_blknos == -1).any():
    self._dispatch(tasks)
  File "exosyspop/populations.py", line 321, in _remove_prop
  File "../simpleabc/simple_abc.py", line 242, in basic_abc
KeyboardInterrupt
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 566, in _dispatch
    self.stars.loc[:, prop] = np.nan
    synthetic_data = model.generate_data(theta)
    job = ImmediateComputeBatch(batch)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/indexing.py", line 117, in __setitem__
  File "<ipython-input-2-fd2a3ba05e22>", line 39, in generate_data
    return self.population.observe(new=True, regr_trap=True).observe(self.eff)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 180, in __init__
    self._setitem_with_indexer(indexer, value)
  File "exosyspop/populations.py", line 880, in observe
    self.results = batch()
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/indexing.py", line 213, in _setitem_with_indexer
    catalog['noise_sec'] = self.get_noise(catalog.host, catalog.T14_sec)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 72, in __call__
    take_split_path = self.obj._is_mixed_type
  File "exosyspop/populations.py", line 1309, in get_noise
    return [func(*args, **kwargs) for func, args, kwargs in self.items]
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 2438, in _is_mixed_type
    y1 = np.diag(s[col1])
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 125, in _parallel_helper
    return self._protect_consolidate(f)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/frame.py", line 1963, in __getitem__
    return getattr(obj, methodname)(*args, **kwargs)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 2402, in _protect_consolidate
    return self._getitem_array(key)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/sklearn/tree/tree.py", line 405, in predict
    result = f()
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/frame.py", line 2008, in _getitem_array
    proba = self.tree_.predict(X)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 2437, in <lambda>
    return self.take(indexer, axis=1, convert=True)
KeyboardInterrupt
    f = lambda: self._data.is_mixed_type
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 1371, in take
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 2936, in is_mixed_type
    convert=True, verify=True)
    self._consolidate_inplace()
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3628, in take
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3199, in _consolidate_inplace
    axis=axis, allow_dups=True)
    self.blocks = tuple(_consolidate(self.blocks))
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3510, in reindex_indexer
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 4189, in _consolidate
    indexer, fill_tuple=(fill_value,))
    _can_consolidate=_can_consolidate)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3591, in _slice_take_blocks_ax0
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 4212, in _merge_blocks
    new_mgr_locs=mgr_locs, fill_tuple=None))
    new_values = new_values[argsort]
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 984, in take_nd
KeyboardInterrupt
    allow_fill=False)
  File "/home/tdm/anaconda2/lib/python2.7/site-packages/pandas/core/common.py", line 787, in take_nd
    out = np.empty(out_shape, dtype=dtype)
KeyboardInterrupt

In [18]:
import numpy as np
pmc_posterior = np.load('scripts/pmc_500.npy')

In [20]:
%matplotlib inline
import matplotlib.pyplot as plt

i1 = 2
i2 = 3

theta_0 = model.theta_0

for i in range(pmc_posterior.shape[0]):
    plt.figure()
    plt.plot(pmc_posterior[i][0][i1,:], pmc_posterior[i][0][i2,:], '.');
    plt.title('epsilon = {}'.format(pmc_posterior[i]['epsilon']))
    plt.plot(theta_0[i1], theta_0[i2], 'rx', ms=20, lw=2)
    plt.xlim(*model.bounds[i1])
    plt.ylim(*model.bounds[i2])



In [21]:
import corner
corner.corner(pmc_posterior[-1][0].T, plot_contours=False, bins=10, 
              labels=['fB', 'beta', 'a', 'b'], truths=theta_0, alpha=1);



In [22]:
fig, axes = plt.subplots(4, 1, figsize=(10,10))

for i,ax in enumerate(axes):
    ax.hist(pmc_posterior[-1][0][i,:], histtype='step', lw=3)
    ax.axvline(theta_0[i], color='r', ls=':', lw=3)



In [25]:
from scipy.stats import beta
fig, ax = plt.subplots(1,1)
eccs = np.linspace(0,1,100)
for a,b in zip(pmc_posterior[i][0][2,:], pmc_posterior[i][0][3,:]):
    ax.plot(eccs, beta(a,b).pdf(eccs), color='k', alpha=0.02)
ax.plot(eccs, beta(0.8,2.0).pdf(eccs), color='r', alpha=1, lw=2);



In [ ]: