In [1]:
from dfply import *
In [2]:
diamonds >> head(5)
Out[2]:
In [3]:
diamonds >> tail(3)
Out[3]:
In [4]:
diamonds >> groupby(X.cut) >> head(2)
Out[4]:
Ungrouping is performed with ungroup(). Operations prior to the ungrouping are done on groups.
In [5]:
diamonds >> groupby(X.cut) >> head(2) >> ungroup() >> head(5)
Out[5]:
In [6]:
diamonds >> select(X.carat, X.cut) >> head(2)
Out[6]:
In [7]:
diamonds >> select(0, X.color, 'depth') >> head(2)
Out[7]:
Arguments will be "flattened" in selection functions.
In [8]:
diamonds >> select(0, [X.color, [[X.depth]]]) >> head(2)
Out[8]:
Dropping:
In [9]:
diamonds >> drop(0,1,[4,5]) >> head(2)
Out[9]:
In [10]:
diamonds >> select_containing('c') >> head(2)
Out[10]:
In [11]:
diamonds >> drop_containing('c') >> head(2)
Out[11]:
In [12]:
diamonds >> select_startswith('c') >> head(1)
Out[12]:
In [13]:
diamonds >> drop_startswith('c') >> head(1)
Out[13]:
In [14]:
diamonds >> select_endswith('t') >> head(1)
Out[14]:
In [15]:
diamonds >> drop_endswith('t') >> head(1)
Out[15]:
In [16]:
diamonds >> select_between(X.depth, 'price') >> head(1)
Out[16]:
In [17]:
diamonds >> select_to(X.x) >> head(1)
Out[17]:
In [18]:
diamonds >> select_through(X.x) >> head(1)
Out[18]:
In [19]:
diamonds >> mutate(price_shift = X.price.shift(1)) >> head(5)
Out[19]:
Mutate can create multiple variables at once
In [20]:
diamonds >> mutate(price_shift = X.price.shift(1), depth_shift2 = X.depth.shift(2)) >> head(5)
Out[20]:
Mutate works with grouping
In [21]:
diamonds >> groupby(X.cut) >> mutate(price_shift = X.price.shift(1)) >> head(2)
Out[21]:
In [22]:
diamonds >> transmute(x_times_y=X.x*X.y, y_times_z=X.y*X.z) >> head(5)
Out[22]:
In [23]:
diamonds >> groupby(X.cut) >> transmute(price_shift=X.price.shift(1)) >> head(5)
Out[23]:
In [24]:
(diamonds >>
groupby(X.cut) >>
summarize(price_mean=np.mean(X.price),
price_first=X.price.values[0]) >>
head(2))
Out[24]:
In [25]:
import numpy as np
(diamonds >>
groupby(X.color) >>
summarize_each([np.mean, np.std, np.var], X.price, X.depth, X.x))
Out[25]:
In [26]:
diamonds >> sample(n=4, replace=False)
Out[26]:
In [27]:
diamonds >> sample(frac=0.0001, replace=True)
Out[27]:
In [28]:
(diamonds >> select('depth') >> distict()).shape
Out[28]:
In [29]:
diamonds >> groupby(X.color) >> row_slice([1,7])
Out[29]:
In [30]:
diamonds >> mask(X.cut == 'Ideal') >> head(4)
Out[30]:
In [31]:
diamonds >> mask(X.cut == 'Ideal', X.color == 'E', X.table < 55, X.price < 500)
Out[31]:
In [32]:
diamonds >> rename(CUT=X.cut, CLARITY=X.clarity) >> head(2)
Out[32]:
In [33]:
diamonds >> rename(CuT='cut') >> head(2)
Out[33]:
In [34]:
diamonds >> arrange(X.table, X.price) >> head(10)
Out[34]:
In [35]:
diamonds >> arrange(X.table, X.price, ascending=False) >> head(10)
Out[35]:
In [36]:
diamonds >> groupby(X.cut) >> arrange(X.depth) >> head(2)
Out[36]:
In [37]:
diamonds >> gather('variable', 'value', ['price', 'depth','x','y','z']) >> head(5)
Out[37]:
In [38]:
diamonds >> gather('variable', 'value') >> head(5)
Out[38]:
In [39]:
elongated = diamonds >> gather('variable', 'value', add_id=True)
elongated >> head(5)
Out[39]:
In [40]:
widened = elongated >> spread(X.variable, X.value)
widened >> head(5)
Out[40]:
In [41]:
widened.dtypes
Out[41]:
Convert keyword argument attempts to convert types
In [42]:
widened = elongated >> spread(X.variable, X.value, convert=True)
widened >> head(5)
Out[42]:
In [43]:
widened.dtypes
Out[43]:
In [44]:
a = pd.DataFrame({
'x1':['A','B','C'],
'x2':[1,2,3]
})
b = pd.DataFrame({
'x1':['A','B','D'],
'x3':[True,False,True]
})
In [45]:
a >> inner_join(b, by='x1')
Out[45]:
In [46]:
a >> outer_join(b, by='x1')
Out[46]:
In [47]:
a >> full_join(b, by='x1')
Out[47]:
In [48]:
a >> left_join(b, by='x1')
Out[48]:
In [49]:
a >> right_join(b, by='x1')
Out[49]:
In [ ]: