In [1]:
import sys

In [2]:
sys.version


Out[2]:
'3.6.7 (default, Oct 21 2018, 04:56:05) \n[GCC 5.4.0 20160609]'

In [3]:
import pandas as pd
import numpy as np
import swifter

In [4]:
pd.__version__, np.__version__,swifter.__version__


Out[4]:
('0.25.1', '1.16.3', '0.292')

dataframe.apply VS series.apply VS swifter.apply


In [5]:
np.random.seed(42)

In [6]:
df1 = pd.DataFrame({
    'x': np.random.random(size=30000000)
})

apply


In [7]:
df1['x'].mean()


Out[7]:
0.5000156711783587

vectorizable functions: winner is swifter series.apply, by a small margin


In [14]:
def apply_to_array(arr):       
    return np.add(np.multiply(arr,2),3)

In [15]:
def apply_to_element(elem):
    return (elem*2)+3

In [17]:
%%time
#dataframe.apply
df1[['x']].apply(apply_to_array)
True


CPU times: user 172 ms, sys: 376 ms, total: 548 ms
Wall time: 548 ms

In [18]:
%%time

# series.apply
df1['x'].apply(apply_to_element)
True


CPU times: user 5.8 s, sys: 576 ms, total: 6.38 s
Wall time: 6.37 s

In [19]:
%%time

# swifter dataframe.apply
df1[['x']].swifter.apply(apply_to_array)
True


CPU times: user 140 ms, sys: 148 ms, total: 288 ms
Wall time: 284 ms

In [21]:
%%time

# swifter series.apply
df1['x'].swifter.apply(apply_to_element)
True


CPU times: user 72 ms, sys: 120 ms, total: 192 ms
Wall time: 190 ms

string functions: winner is regular series.apply; swifter.apply fails miserably


In [22]:
def num_to_str(num):
    return str(num)

In [23]:
%%time

# series.apply
df1['x'].apply(num_to_str)
True


CPU times: user 22.8 s, sys: 952 ms, total: 23.8 s
Wall time: 23.8 s

In [24]:
%%time

# swifter series.apply
df1['x'].swifter.apply(num_to_str)
True


CPU times: user 1min 40s, sys: 5.66 s, total: 1min 45s
Wall time: 2min 17s

if-then-else: swifter wins by a small margin


In [25]:
def if_then_else(x):
    if x >= 0.5:
        return True
    else:
        return False

In [26]:
%%time

# series.apply
df1['x'].map(if_then_else)
True


CPU times: user 4.19 s, sys: 400 ms, total: 4.59 s
Wall time: 4.59 s

In [27]:
%%time

# swifter series.apply
df1['x'].swifter.apply(if_then_else)
True


CPU times: user 1.06 s, sys: 356 ms, total: 1.42 s
Wall time: 3.79 s