A vaex dataframe can be lazily converted to a dask.array using DataFrame.to_dask_array.
In [2]:
import vaex
df = vaex.example()
df
Out[2]:
# x y z vx vy vz E L Lz FeH
0 -0.777470767 2.10626292 1.93743467 53.276722 288.386047 -95.2649078 -121238.171875 831.0799560546875 -336.426513671875 -2.309227609164518
1 3.77427316 2.23387194 3.76209331 252.810791 -69.9498444 -56.3121033 -100819.9140625 1435.1839599609375 -828.7567749023438 -1.788735491591229
2 1.3757627 -6.3283844 2.63250017 96.276474 226.440201 -34.7527161 -100559.9609375 1039.2989501953125 920.802490234375 -0.7618109022478798
3 -7.06737804 1.31737781 -6.10543537 204.968842 -205.679016 -58.9777031 -70174.8515625 2441.724853515625 1183.5899658203125 -1.5208778422936413
4 0.243441463 -0.822781682 -0.206593871 -311.742371 -238.41217 186.824127 -144138.75 374.8164367675781 -314.5353088378906 -2.655341358427361
... ... ... ... ... ... ... ... ... ... ...
329,995 3.76883793 4.66251659 -4.42904139 107.432999 -2.13771296 17.5130272 -119687.3203125 746.8833618164062 -508.96484375 -1.6499842518381402
329,996 9.17409325 -8.87091351 -8.61707687 32.0 108.089264 179.060638 -68933.8046875 2395.633056640625 1275.490234375 -1.4336036247720836
329,997 -1.14041007 -8.4957695 2.25749826 8.46711349 -38.2765236 -127.541473 -112580.359375 1182.436279296875 115.58557891845703 -1.9306227597361942
329,998 -14.2985935 -5.51750422 -8.65472317 110.221558 -31.3925591 86.2726822 -74862.90625 1324.5926513671875 1057.017333984375 -1.225019818838568
329,999 10.5450506 -8.86106777 -4.65835428 -2.10541415 -27.6108856 3.80799961 -95361.765625 351.0955505371094 -309.81439208984375 -2.5689636894079477
In [10]:
# convert a set of columns in the dataframe to a 2d dask array
A = df[['x', 'y', 'z']].to_dask_array()
A
Out[10]:
Array Chunk
Bytes 7.92 MB 7.92 MB
Shape (330000, 3) (330000, 3)
Count 2 Tasks 1 Chunks
Type float64 numpy.ndarray
In [11]:
import dask.array as da
# lazily compute with dask
r = da.sqrt(A[:,0]**2 + A[:,1]**2 + A[:,2]**2)
r
Out[11]:
Array Chunk
Bytes 2.64 MB 2.64 MB
Shape (330000,) (330000,)
Count 11 Tasks 1 Chunks
Type float64 numpy.ndarray
In [12]:
# materialize the data
r_computed = r.compute()
r_computed
In [15]:
# put it back in the dataframe
df['r'] = r_computed
df
Out[15]:
# x y z vx vy vz E L Lz FeH r
0 -0.777470767 2.10626292 1.93743467 53.276722 288.386047 -95.2649078 -121238.171875 831.0799560546875 -336.426513671875 -2.309227609164518 2.9655450396553587
1 3.77427316 2.23387194 3.76209331 252.810791 -69.9498444 -56.3121033 -100819.9140625 1435.1839599609375 -828.7567749023438 -1.788735491591229 5.77829281049018
2 1.3757627 -6.3283844 2.63250017 96.276474 226.440201 -34.7527161 -100559.9609375 1039.2989501953125 920.802490234375 -0.7618109022478798 6.99079603950256
3 -7.06737804 1.31737781 -6.10543537 204.968842 -205.679016 -58.9777031 -70174.8515625 2441.724853515625 1183.5899658203125 -1.5208778422936413 9.431842752707537
4 0.243441463 -0.822781682 -0.206593871 -311.742371 -238.41217 186.824127 -144138.75 374.8164367675781 -314.5353088378906 -2.655341358427361 0.8825613121347967
... ... ... ... ... ... ... ... ... ... ... ...
329,995 3.76883793 4.66251659 -4.42904139 107.432999 -2.13771296 17.5130272 -119687.3203125 746.8833618164062 -508.96484375 -1.6499842518381402 7.453831761514681
329,996 9.17409325 -8.87091351 -8.61707687 32.0 108.089264 179.060638 -68933.8046875 2395.633056640625 1275.490234375 -1.4336036247720836 15.398412491068198
329,997 -1.14041007 -8.4957695 2.25749826 8.46711349 -38.2765236 -127.541473 -112580.359375 1182.436279296875 115.58557891845703 -1.9306227597361942 8.864250273925633
329,998 -14.2985935 -5.51750422 -8.65472317 110.221558 -31.3925591 86.2726822 -74862.90625 1324.5926513671875 1057.017333984375 -1.225019818838568 17.601047186042507
329,999 10.5450506 -8.86106777 -4.65835428 -2.10541415 -27.6108856 3.80799961 -95361.765625 351.0955505371094 -309.81439208984375 -2.5689636894079477 14.540181524970293
In [ ]:
Content source: maartenbreddels/vaex
Similar notebooks: