In [4]:
#!/usr/bin/python
# File: sum_primes.py
# Author: VItalii Vanovschi
# Desc: This program demonstrates parallel computations with pp module
# It calculates the sum of prime numbers below a given integer in parallel
# Parallel Python Software: http://www.parallelpython.com
import math, sys, time
import pp
def isprime(n):
"""Returns True if n is prime and False otherwise"""
if not isinstance(n, int):
raise TypeError("argument passed to is_prime is not of 'int' type")
if n < 2:
return False
if n == 2:
return True
max = int(math.ceil(math.sqrt(n)))
i = 2
while i <= max:
if n % i == 0:
return False
i += 1
return True
def sum_primes(n):
"""Calculates sum of all primes below given integer n"""
return sum([x for x in xrange(2,n) if isprime(x)])
print """Usage: python sum_primes.py [ncpus]
[ncpus] - the number of workers to run in parallel,
if omitted it will be set to the number of processors in the system
"""
# tuple of all parallel python servers to connect with
ppservers = ("localhost",)
#ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
# Creates jobserver with ncpus workers
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
# Creates jobserver with automatically detected number of workers
job_server = pp.Server(ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
# Submit a job of calulating sum_primes(100) for execution.
# sum_primes - the function
# (100,) - tuple with arguments for sum_primes
# (isprime,) - tuple with functions on which function sum_primes depends
# ("math",) - tuple with module names which must be imported before sum_primes execution
# Execution starts as soon as one of the workers will become available
job1 = job_server.submit(sum_primes, (100,), (isprime,), ("math",))
# Retrieves the result calculated by job1
# The value of job1() is the same as sum_primes(100)
# If the job has not been finished yet, execution will wait here until result is available
result = job1()
print "Sum of primes below 100 is", result
start_time = time.time()
# The following submits 8 jobs and then retrieves the results
inputs = (100000, 100100, 100200, 100300, 100400, 100500, 100600, 100700)
jobs = [(input, job_server.submit(sum_primes,(input,), (isprime,), ("math",))) for input in inputs]
for input, job in jobs:
print "Sum of primes below", input, "is", job()
print "Time elapsed: ", time.time() - start_time, "s"
job_server.print_stats()
# Parallel Python Software: http://www.parallelpython.com
In [ ]:
starttime = time.time()
bins=bayesian_blocks(hmqdat['Z'])
print bins
endtime = time.time()
tottime = endtime-starttime
print "total time taken"
print tottime
In [ ]:
job1 = job_server.submit(sum_primes, (100,), (isprime,), ("math",))
In [5]:
import math, sys, time
import pp
import numpy as np
from scipy import stats
import pylab as pl
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.constants import c
import matplotlib.pyplot as plt
import math
import scipy.special as sp
from scipy import integrate
%pylab inline
%matplotlib inline
import time
In [6]:
hmqdat=ascii.read("./hmqdata_sorted_full.csv")
In [ ]:
def bayesian_blocks(t):
"""Bayesian Blocks Implementation
By Jake Vanderplas. License: BSD
Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
Parameters
----------
t : ndarray, length N
data to be histogrammed
Returns
-------
bins : ndarray
array containing the (N+1) bin edges
Notes
-----
This is an incomplete implementation: it may fail for some
datasets. Alternate fitness functions and prior forms can
be found in the paper listed above.
"""
# copy and sort the array
t = np.sort(t)
N = t.size
# create length-(N + 1) array of cell edges
edges = np.concatenate([t[:1],
0.5 * (t[1:] + t[:-1]),
t[-1:]])
block_length = t[-1] - edges
# arrays needed for the iteration
nn_vec = np.ones(N)
best = np.zeros(N, dtype=float)
last = np.zeros(N, dtype=int)
#-----------------------------------------------------------------
# Start with first data cell; add one cell at each iteration
#-----------------------------------------------------------------
for K in range(N):
# Compute the width and count of the final bin for all possible
# locations of the K^th changepoint
width = block_length[:K + 1] - block_length[K + 1]
count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]
# evaluate fitness function for these possibilities
fit_vec = count_vec * (np.log(count_vec) - np.log(width))
fit_vec -= 4 # 4 comes from the prior on the number of changepoints
fit_vec[1:] += best[:K]
# find the max of the fitness: this is the K^th changepoint
i_max = np.argmax(fit_vec)
last[K] = i_max
best[K] = fit_vec[i_max]
#-----------------------------------------------------------------
# Recover changepoints by iteratively peeling off the last block
#-----------------------------------------------------------------
change_points = np.zeros(N, dtype=int)
i_cp = N
ind = N
while True:
i_cp -= 1
change_points[i_cp] = ind
if ind == 0:
break
ind = last[ind - 1]
change_points = change_points[i_cp:]
print edges[change_points]
return edges[change_points]
In [ ]:
ppservers = ("198.168.1.1",)
#ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
#ncpus = int(float(sys.argv[1]))
# Creates jobserver with ncpus workers
job_server = pp.Server(ppservers=ppservers)
else:
# Creates jobserver with automatically detected number of workers
job_server = pp.Server(ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
# Submit a job of calulating sum_primes(100) for execution.
# sum_primes - the function
# (100,) - tuple with arguments for sum_primes
# (isprime,) - tuple with functions on which function sum_primes depends
# ("math",) - tuple with module names which must be imported before sum_primes execution
# Execution starts as soon as one of the workers will become available
#job1 = job_server.submit(bayesian_blocks, (100,), (isprime,), ("math",))
# Retrieves the result calculated by job1
# The value of job1() is the same as sum_primes(100)
# If the job has not been finished yet, execution will wait here until result is available
#result = job1()
#print "Sum of primes below 100 is", result
start_time = time.time()
# The following submits 8 jobs and then retrieves the results
inputs = hmqdat['Z']
jobs = [(input, job_server.submit(bayesian_blocks,(input,),("np",))) for input in inputs]
for input, job in jobs:
print "Sum of primes below", input, "is", job()
print "Time elapsed: ", time.time() - start_time, "s"
job_server.print_stats()
In [10]:
int(10.5)
Out[10]:
In [14]:
print sys.argv[1]
In [15]:
sys.argv
Out[15]:
In [ ]: