In [5]:
from time import time
import numpy as np
# IPython multiprocessing:
from IPython.parallel import Client
rc = Client() # start a client
dview = rc.direct_view() # direct view of all engines
dview.block=True # wait until all engines are done executing a command
# Multiprocessing module:
from multiprocessing import Pool
pool = Pool(processes=2)
def throw_darts(number_of_darts):
""" throw number_of_darts darts and count how many of them hit the circle """
from random import random # faster than random.uniform
num_darts = int(number_of_darts) # number of darts to throw
hits = 0 # number of darts falling within the circle
for n in xrange(num_darts): # loop + xrange => don't store in memory
x, y = random(), random()
if x*x + y*y <= 1:
hits += 1
return hits
def serial_pi(n):
""" Simple Monte Carlo serial estimation of pi, using n darts """
tic = time() # start time
hits = throw_darts(n)
toc = time() # end time
pi_est = 4.0*(hits/float(n))
return toc-tic
def IPcluster_pi(n):
""" Parallel processed Monte Carlo estimation of pi using IPython's cluster, using n darts """
nnod = len(dview)
tic = time() # start time
hits = sum(dview.map(throw_darts, nnod*[int(n/nnod)]))
toc = time() # end time
pi_est = 4.0*(hits/float(n))
return toc-tic
def multiprocessing_pi(n):
""" Parallel processed Monte Carlo estimation of pi using multiprocessing module, using n darts """
tic = time() # start time
result = pool.map(throw_darts, (n,))
hits = result[0]
toc = time() # end time
pi_est = 4.0*(hits/float(n))
return toc-tic
def darts_grid_search(dart_trials, repeats):
""" Estimates pi using all 3 methods for each number of darts listed in the 'dart_trials' list.
Returns a np array with columns: [# darts thrown, serial_times, IPC_times, MP_times] where
each value is the average of 'repeats' number of repeated trials """
trials = []
print "Simulation completion:"
for trial in xrange(repeats):
print "{0:.1f}%...\t".format(100*(trial+1)/float(repeats)),
serial_times = []
IPC_times = []
MP_times = []
for darts in dart_trials:
serial_times.append(serial_pi(darts))
IPC_times.append(IPcluster_pi(darts))
MP_times.append(multiprocessing_pi(darts))
trials.append(np.array([dart_trials, serial_times, IPC_times, MP_times]).T)
return sum(trials)/float(len(trials)) # average over the number of repeats
##########################################################################################
######################## Plotting #################################
##########################################################################################
import matplotlib.pyplot as plt
%matplotlib inline
results = darts_grid_search(np.logspace(1,7,15), 15)
# Plot execution times
f1, ax1 = plt.subplots()
f1.set_size_inches(11.2,8.56) # size of figure
ax1.plot(results[:,0], results[:,1], c='red', lw=2, label='Simple')
ax1.plot(results[:,0], results[:,3], c='cyan', lw=2, label='Multiprocessing')
ax1.plot(results[:,0], results[:,2], c='green', lw=2, label='IPcluster')
ax1.set_yscale('log')
ax1.set_xscale('log')
# Plot run times
ax2 = ax1.twinx() # double axis
ax2.plot(results[:,0], results[:,0]/results[:,1], c='red', ls='--', lw=2, label='Simple')
ax2.plot(results[:,0], results[:,0]/results[:,3], c='cyan', ls='--', lw=2, label='Multiprocessing')
ax2.plot(results[:,0], results[:,0]/results[:,2], c='green', ls='--', lw=2, label='IPcluster')
ax2.set_yscale('log')
t = ax1.set_title('Parallel Processing Parametric Study', size=32, fontweight='bold', fontname='Times New Roman')
ax2.set_ylabel('Simulation Rate [darts/second], dashed line', size=20)
ax1.set_xlabel('# Darts Thrown', size=18)
ax1.set_ylabel('Execution Time [seconds], solid line', size=18)
t.set_y(1.02) # move title up a little
# Render and format legend
ax1.legend(loc="upper left")
#f1.tight_layout()
plt.savefig('parallel_processing.pdf') # Save the figure
plt.show()
In [ ]: