In [1]:
import threading
import time
a = ''
def task_1():
global a
for i in range(10):
print('o', end='', flush=True)
a += 'o'
print(a)
time.sleep(1) # Blocking -> yield to other thread
def task_2():
global a
for i in range(20):
print('O', end='', flush=True)
a += 'O'
print(a)
time.sleep(0.6) # Blocking -> yield to other thread
thread_1 = threading.Thread(target=task_1)
thread_2 = threading.Thread(target=task_2)
thread_1.start()
thread_2.start()
print("(Both threads have started)")
thread_1.join() # Wait for thread_1 to finish
thread_2.join()
print("\nBoth threads have finished")
print(a)
Notice that both threads share the same process memory space.
In [2]:
# This code does not work!
import multiprocessing
import time
a = '' # Does not work because each process has its own 'a'
def task_1():
global a
for i in range(10):
print('o', end='', flush=True)
a += 'o'
print(a)
time.sleep(1) # Blocking -> yield to other process
def task_2():
global a
for i in range(20):
print('O', end='', flush=True)
a += 'O'
print(a)
time.sleep(0.6) # Blocking -> yield to other process
process_1 = multiprocessing.Process(target=task_1)
process_2 = multiprocessing.Process(target=task_2)
process_1.start()
process_2.start()
print("(Both processes have started)")
process_1.join()
process_2.join()
print("\nBoth processes have finished")
print(a)
By definition, processes must fork) (make a copy of itself, that is, the code and the used memory) before start running. In the previous example, the Python interpreter forks twice and the two childs are run in parallel while the parent process waits for their completition. Neither, the child processes nor the parent process share their global state (where a
is defined).
There are several options. One of them is to use a shared memory Manager()
:
In [3]:
import multiprocessing
import time
import ctypes
def task_1(a):
for i in range(10):
print('o', end='', flush=True)
a.value += 'o'
time.sleep(1) # Blocking -> yield to other process
def task_2(a):
for i in range(20):
print('O', end='', flush=True)
a.value += 'O'
time.sleep(0.6) # Blocking -> yield to other process
manager = multiprocessing.Manager()
# See https://docs.python.org/3/library/ctypes.html#module-ctypes
a = manager.Value(ctypes.c_char_p, "")
process_1 = multiprocessing.Process(target=task_1, args=(a,))
process_2 = multiprocessing.Process(target=task_2, args=(a,))
process_1.start()
process_2.start()
print("(Both processes have started)")
process_1.join()
process_2.join()
print("\nBoth processes have finished")
print(a.value)
In [21]:
import time
a = ''
def task_1():
global a
for i in range(20):
print('o', end='', flush=True)
a += 'o'
yield
time.sleep(0.5)
def task_2():
global a
for i in range(20):
print('O', end='', flush=True)
a += 'O'
yield
time.sleep(0.25)
t1 = task_1()
t2 = task_2()
now = time.perf_counter() # Real time (not only user time)
for i in range(20):
t1.__next__() # Call task_1()
t2.__next__() # Call task_2()
elapsed = time.perf_counter() - now
print(f"\nelapsed {elapsed:0.2f} seconds.")
print(20*0.5 + 20*0.25)
print("a =", a)
In this case, the coroutines yields their control to the "event loop" (also called, the "coordinator") with a await
command, which search for the next ready coroutine to be run in a round-robin list. Notice that for this reason, the wall time of this version is smaller than in the previous case.
In [3]:
import asyncio
import time
a = ''
async def task_1():
global a
for i in range(20):
print('o', end='', flush=True)
a += 'o'
await asyncio.sleep(0.5)
async def task_2():
global a
for i in range(20):
print('O', end='', flush=True)
a += 'O'
await asyncio.sleep(0.25)
now = time.perf_counter()
await asyncio.gather(task_1(), task_2())
elapsed = time.perf_counter() - now
print(f"\nelapsed {elapsed:0.2f} seconds.")
print("a =", a)