How to Use the multiprocessing Library in Python

multiprocessing is a standard Python library for spawning processes using a simplar API to the threading module.


In [138]:
import math

def is_prime(number):
    """
    Is a number a prime number.
    """
    if number % 2 == 0 and number > 2:
        return (number, False)
    else:
        return (number, True)
    
# Let's do a quick test.
print(is_prime(3))
print(is_prime(4))


(3, True)
(4, False)

In [139]:
from multiprocessing import Pool

with Pool() as pool:
    results = pool.map(is_prime, range(80, 90))

In [140]:
for number, is_prime in results:
    if is_prime:
        print('{} is a prime number.'.format(number))


81 is a prime number.
83 is a prime number.
85 is a prime number.
87 is a prime number.
89 is a prime number.

In [146]:
import csv

with open('./data/dataOct-5-2017.csv') as fobj:
    reader = csv.DictReader(fobj)
    data = list(reader)

In [147]:
COMPANIES = [
    {'id': 893, 'name': 'Duls Volutpat Nunc'},
    {'id': 132, 'name': 'Elnm Assoc'},
    {'id': 524, 'name': 'Amet Lorem'}
]

In [148]:
from fuzzywuzzy import fuzz, process

def company_exists(person):
    choices = [{c['id']: c['name']} for c in COMPANIES]
    
    match, score = process.extractOne(person['company'],
                                      choices=choices,
                                      scorer=fuzz.token_sort_ratio)
    
    return (person, match, score)

company_exists({'company': 'Erat Inc.'})


Out[148]:
({'company': 'Erat Inc.'}, {132: 'Elnm Assoc'}, 27)

In [149]:
from multiprocessing import Pool

with Pool() as pool:
    results = pool.map(company_exists, data)

In [150]:
for person, match, score in results:
    if score > 70:
        print(person)


OrderedDict([('name', 'Chandler Miranda'), ('company', 'Duis Volutpat Nunc Ltd'), ('salary', '$45,845')])
OrderedDict([('name', 'Renee J. Jordan'), ('company', 'Amet Lorem Ltd'), ('salary', '$31,809')])

In [ ]: