In [160]:
from __future__ import print_function, division
import numpy as np

# Create a big numpy array
N = 1E7
arr = np.random.randn(N)
print(arr[:5])
print('dtype=', arr.dtype)
print('MBytes=', N*8 / 1E6)


[ 0.1603029   0.36586917 -0.12065417  1.4464936  -2.65329948]
dtype= float64
MBytes= 80.0

In [161]:
def clip(data):
    return data.clip(0.4,0.5)

In [173]:
def chunk_by_chunk_clip(data):
    # n_chunks = 80 * 16 # 64KB fits into L1 = 16.2 ms per loop
    # n_chunks = 80 * 4 # 256KB fits into L2 = 1.54 ms per loop
    # n_chunks = 80 # 1MB = 0.367 ms
    n_chunks = 40 # 2MB = 0.345 ms
    # n_chunks = 90 // 3 # 3MB = 0.428 ms
    chunk_len = N // n_chunks
    # print('MBytes per chunk= {:.3f}MB'.format(chunk_len * 8 / 1E6))
    boundaries = np.arange(0, N, chunk_len, dtype=np.uint)
    for i in range(n_chunks-1):
        chunk = data[boundaries[i]:boundaries[i+1]]
        chunk = chunk.clip(0.4,0.5)
    return data

In [166]:
%timeit clip(arr)


10 loops, best of 3: 131 ms per loop

In [174]:
%timeit chunk_by_chunk_clip(arr)


10 loops, best of 3: 83.1 ms per loop

In [175]:
# OK, now let's try with pandas
import pandas as pd

series = pd.Series(arr)

In [176]:
%timeit clip(series)


1 loops, best of 3: 285 ms per loop

In [177]:
%timeit clip(series.values)


10 loops, best of 3: 138 ms per loop

In [178]:
%timeit chunk_by_chunk_clip(series)


1 loops, best of 3: 388 ms per loop

In [179]:
%timeit chunk_by_chunk_clip(series.values)


10 loops, best of 3: 83.1 ms per loop

In [180]:
# and let's try doing some pandas operations

def rolling_mean(data):
    return pd.rolling_mean(data, 30)

def chunk_by_chunk_rolling_mean(data):
    # n_chunks = 80 * 16 # 64KB fits into L1 = 16.2 ms per loop
    # n_chunks = 80 * 4 # 256KB fits into L2 = 1.54 ms per loop
    # n_chunks = 80 # 1MB = 0.367 ms
    n_chunks = 40 # 2MB = 0.345 ms
    # n_chunks = 90 // 3 # 3MB = 0.428 ms
    chunk_len = N // n_chunks
    # print('MBytes per chunk= {:.3f}MB'.format(chunk_len * 8 / 1E6))
    boundaries = np.arange(0, N, chunk_len, dtype=np.uint)
    for i in range(n_chunks-1):
        chunk = data.iloc[boundaries[i]:boundaries[i+1]]
        chunk = pd.rolling_mean(chunk, 30)
    return data

In [181]:
%timeit rolling_mean(series)


1 loops, best of 3: 237 ms per loop

In [182]:
%timeit chunk_by_chunk_rolling_mean(series)


10 loops, best of 3: 171 ms per loop

In [183]:
series32 = series.astype(np.float32)
%timeit chunk_by_chunk_rolling_mean(series32)
# but if we double the chunk size then we get 5 ms per loop


1 loops, best of 3: 175 ms per loop

In [184]:
# Let's try multiple ops per chunk

def rolling_mean_and_clip(data):
    data = pd.rolling_mean(data, 30)
    data = data.values.clip(0.4,0.5)
    return data

def chunk_by_chunk_rolling_mean_and_clip(data, n_chunks=40):
    # n_chunks = 80 * 16 # 64KB fits into L1 = 16.2 ms per loop
    # n_chunks = 80 * 4 # 256KB fits into L2 = 1.54 ms per loop
    # n_chunks = 80 # 1MB = 0.367 ms
    # n_chunks = 40 # 2MB = 0.345 ms
    # n_chunks = 90 // 3 # 3MB = 0.428 ms
    chunk_len = N // n_chunks
    # print('MBytes per chunk= {:.3f}MB'.format(chunk_len * 8 / 1E6))
    boundaries = np.arange(0, N, chunk_len, dtype=np.uint)
    for i in range(n_chunks-1):
        chunk = data.iloc[boundaries[i]:boundaries[i+1]]
        chunk = pd.rolling_mean(chunk, 30)
        chunk = chunk.values.clip(0.4,0.5)
    return data

In [185]:
%timeit rolling_mean_and_clip(series)


1 loops, best of 3: 346 ms per loop

In [186]:
%timeit chunk_by_chunk_rolling_mean_and_clip(series)


1 loops, best of 3: 223 ms per loop

In [202]:
# Create plots of chunk size against time

import time
step = 100 # kbytes
max = 50000 # kbytes
times = np.empty(max//step)
for chunk_size in range(0,max,step)[1:]:
    """chunksize is in kBytes"""
    chunk_len = (chunk_size * 1000) / 8
    n_chunks = int(N // chunk_len)
    if n_chunks == 0:
        t0 = time.time()
        rooling_mean_and_clip(series)
    else:
        t0 = time.time()
        chunk_by_chunk_rolling_mean_and_clip(series, n_chunks)
        
    times[chunk_size//step] = time.time() - t0
    
times[0] = 0

In [203]:
plot(range(0,max,step)[1:], times[1:], label='chunked')
hold('on')
ylabel('time (miliseconds)')
xlabel('chunk size (kBytes)')

t0 = time.time()
rolling_mean_and_clip(series)
duration_for_unchunked = time.time() - t0
plot(xlim(), [duration_for_unchunked, duration_for_unchunked], label='unchunked')
legend()


Out[203]:
<matplotlib.legend.Legend at 0x5bbba10>

Conclusions

Yes, it can be faster to process data in chunks (my L3 cache is 3MB). I'm not entirely sure I trust these results though. The bottom line is that I'm not going to bother getting data into small (cache-sized) chunks for now. We can always add it later if we really want to.


In [ ]: