In [1]:
import pandas as pd
import gtable as gt
import numpy as np

df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                    index=[0, 1, 2, 3])


df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                     index=[4, 5, 6, 7])

t1 = gt.Table({'A': ['A0', 'A1', 'A2', 'A3'],
               'B': ['B0', 'B1', 'B2', 'B3'],
               'C': ['C0', 'C1', 'C2', 'C3'],
               'D': ['D0', 'D1', 'D2', 'D3'],
               'idx': [1, 2, 3, 4]})

t2 = gt.Table({'A': ['A4', 'A5', 'A6', 'A7'],
               'B': ['B4', 'B5', 'B6', 'B7'],
               'C': ['C4', 'C5', 'C6', 'C7'],
               'D': ['D4', 'D5', 'D6', 'D7'],
               'idx': [5, 6, 7, 8]})

In [2]:
%%timeit
pd.concat([df1, df2])


605 µs ± 82.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [3]:
%%timeit
gt.full_outer_join(t1, t2, 'idx', check_sorted=False)


208 µs ± 925 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [4]:
df1 = pd.DataFrame({'A': np.random.rand(100),
                    'B': np.random.rand(100),
                    'C': np.random.rand(100),
                    'D': np.random.rand(100),
                    'E': np.random.rand(100),
                    'F': np.random.rand(100),
                    'G': np.random.rand(100)},
                   index=np.arange(100))

df2 = pd.DataFrame({'A': np.random.rand(100),
                    'G': np.random.rand(100)},
                   index=np.arange(100, 200))

In [5]:
%%timeit
pd.concat([df1, df2])


1.37 ms ± 122 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [6]:
t1 = gt.Table({'A': np.random.rand(100),
               'B': np.random.rand(100),
               'C': np.random.rand(100),
               'D': np.random.rand(100),
               'E': np.random.rand(100),
               'F': np.random.rand(100),
               'G': np.random.rand(100),
               'idx': np.arange(100)})

t2 = gt.Table({'A': np.random.rand(100),
               'G': np.random.rand(100),
               'idx': np.arange(100, 200)})

In [7]:
%%timeit
gt.full_outer_join(t1, t2, 'idx', check_sorted=False)


257 µs ± 2.53 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [8]:
df1 = pd.DataFrame({'A': np.random.rand(100000),
                    'B': np.random.rand(100000),
                    'C': np.random.rand(100000)},
                   index=np.arange(100000))

df2 = pd.DataFrame({'A': np.random.rand(100000),
                    'B': np.random.rand(100000),
                    'C': np.random.rand(100000)},
                   index=np.arange(100000, 200000))

In [9]:
%%timeit
pd.concat([df1, df2])


1.84 ms ± 101 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [10]:
t1 = gt.Table({'A': np.random.rand(100000),
               'B': np.random.rand(100000),
               'C': np.random.rand(100000),
               'idx': np.arange(100000)})

t2 = gt.Table({'A': np.random.rand(100000),
               'B': np.random.rand(100000),
               'C': np.random.rand(100000),
               'idx': np.arange(100000, 200000)})

In [11]:
%%timeit
gt.full_outer_join(t1, t2, 'idx', check_sorted=False)


24.3 ms ± 33.4 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

In [ ]: