In [1]:
import sys
In [2]:
sys.version
Out[2]:
In [3]:
import pandas as pd
import numpy as np
import swifter
In [4]:
pd.__version__, np.__version__,swifter.__version__
Out[4]:
In [5]:
np.random.seed(42)
In [6]:
df1 = pd.DataFrame({
'x': np.random.random(size=30000000)
})
apply
In [7]:
df1['x'].mean()
Out[7]:
vectorizable functions: winner is swifter series.apply, by a small margin
In [14]:
def apply_to_array(arr):
return np.add(np.multiply(arr,2),3)
In [15]:
def apply_to_element(elem):
return (elem*2)+3
In [17]:
%%time
#dataframe.apply
df1[['x']].apply(apply_to_array)
True
In [18]:
%%time
# series.apply
df1['x'].apply(apply_to_element)
True
In [19]:
%%time
# swifter dataframe.apply
df1[['x']].swifter.apply(apply_to_array)
True
In [21]:
%%time
# swifter series.apply
df1['x'].swifter.apply(apply_to_element)
True
string functions: winner is regular series.apply; swifter.apply fails miserably
In [22]:
def num_to_str(num):
return str(num)
In [23]:
%%time
# series.apply
df1['x'].apply(num_to_str)
True
In [24]:
%%time
# swifter series.apply
df1['x'].swifter.apply(num_to_str)
True
if-then-else: swifter wins by a small margin
In [25]:
def if_then_else(x):
if x >= 0.5:
return True
else:
return False
In [26]:
%%time
# series.apply
df1['x'].map(if_then_else)
True
In [27]:
%%time
# swifter series.apply
df1['x'].swifter.apply(if_then_else)
True