In [1]:
from gtable import Table
from gtable.joins import inner_join, full_outer_join
import numpy as np

t1 = Table()
t1.add_column('a', [1, 2, 2, 3, 3, 4, 5, 6])
t1.add_column('b', [1, 2, 3])

t2 = Table()
t2.add_column('a', [2, 3, 4])
t2.add_column('b', [1, 1, 1])
t2.add_column('c', [5, 6, 7])

In [2]:
t3 = inner_join(t1, t2, 'b')

In [3]:
t3.data


Out[3]:
[array([1, 1, 1]), array([1, 1, 1]), array([5, 6, 7])]

In [4]:
%%timeit
inner_join(t1, t2, 'b')


202 µs ± 1.03 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

In [5]:
df1 = t1.to_pandas()
df2 = t2.to_pandas()

In [ ]:
df1


Out[ ]:
a b
0 1 1.0
1 2 2.0
2 2 3.0
3 3 NaN
4 3 NaN
5 4 NaN
6 5 NaN
7 6 NaN

In [2]:
t4 = full_outer_join(t1, t2, 'a')


[1 2 2 3 3 4 5 6] [2 3 4]
1 2 0 0
False False
2 2 1 0
False False
2 3 2 1
False False
3 3 3 1
False False
3 4 4 2
False False
4 4 5 2
False True
5 4 6 2
False True
5 4 6 2
False True
5 4 6 2
False True
5 4 6 2
False True
5 4 6 2
False True
11
[1 2 2 3 3 4 4 4 4 4 4] [0 1 2 3 4 5 6 6 6 6 6] [0 0 1 1 2 2 2 2 2 2 2]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-b9825a6aa391> in <module>()
----> 1 t4 = full_outer_join(t1, t2, 'a')

~/projects/gtable/gtable/joins.py in full_outer_join(table_left, table_right, column)
    138 
    139     res = Table()
--> 140     res.data = data
    141     res.index = np.vstack(index)
    142     res.keys = keys

NameError: name 'data' is not defined

In [ ]: