In [1]:

    
import sys



In [2]:

    
sys.version









    Out[2]:





'3.6.7 (default, Oct 21 2018, 04:56:05) \n[GCC 5.4.0 20160609]'



In [3]:

    
import pandas as pd
import numpy as np
import swifter



In [4]:

    
pd.__version__, np.__version__,swifter.__version__









    Out[4]:





('0.25.1', '1.16.3', '0.292')

dataframe.apply VS series.apply VS swifter.apply



In [5]:

    
np.random.seed(42)



In [6]:

    
df1 = pd.DataFrame({
    'x': np.random.random(size=30000000)
})

apply



In [7]:

    
df1['x'].mean()









    Out[7]:





0.5000156711783587

vectorizable functions: winner is swifter series.apply, by a small margin



In [14]:

    
def apply_to_array(arr):       
    return np.add(np.multiply(arr,2),3)



In [15]:

    
def apply_to_element(elem):
    return (elem*2)+3



In [17]:

    
%%time
#dataframe.apply
df1[['x']].apply(apply_to_array)
True









    



CPU times: user 172 ms, sys: 376 ms, total: 548 ms
Wall time: 548 ms



In [18]:

    
%%time

# series.apply
df1['x'].apply(apply_to_element)
True









    



CPU times: user 5.8 s, sys: 576 ms, total: 6.38 s
Wall time: 6.37 s



In [19]:

    
%%time

# swifter dataframe.apply
df1[['x']].swifter.apply(apply_to_array)
True









    



CPU times: user 140 ms, sys: 148 ms, total: 288 ms
Wall time: 284 ms



In [21]:

    
%%time

# swifter series.apply
df1['x'].swifter.apply(apply_to_element)
True









    



CPU times: user 72 ms, sys: 120 ms, total: 192 ms
Wall time: 190 ms

string functions: winner is regular series.apply; swifter.apply fails miserably



In [22]:

    
def num_to_str(num):
    return str(num)



In [23]:

    
%%time

# series.apply
df1['x'].apply(num_to_str)
True









    



CPU times: user 22.8 s, sys: 952 ms, total: 23.8 s
Wall time: 23.8 s



In [24]:

    
%%time

# swifter series.apply
df1['x'].swifter.apply(num_to_str)
True









    





 
 










    



CPU times: user 1min 40s, sys: 5.66 s, total: 1min 45s
Wall time: 2min 17s

if-then-else: swifter wins by a small margin



In [25]:

    
def if_then_else(x):
    if x >= 0.5:
        return True
    else:
        return False



In [26]:

    
%%time

# series.apply
df1['x'].map(if_then_else)
True









    



CPU times: user 4.19 s, sys: 400 ms, total: 4.59 s
Wall time: 4.59 s



In [27]:

    
%%time

# swifter series.apply
df1['x'].swifter.apply(if_then_else)
True









    





 
 










    



CPU times: user 1.06 s, sys: 356 ms, total: 1.42 s
Wall time: 3.79 s