In [1]:
import pandas as pd
import gtable as gt
import numpy as np
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7']},
index=[4, 5, 6, 7])
t1 = gt.Table({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3'],
'idx': [1, 2, 3, 4]})
t2 = gt.Table({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7'],
'idx': [5, 6, 7, 8]})
In [2]:
%%timeit
pd.concat([df1, df2])
In [3]:
%%timeit
gt.full_outer_join(t1, t2, 'idx', check_sorted=False)
In [4]:
df1 = pd.DataFrame({'A': np.random.rand(100),
'B': np.random.rand(100),
'C': np.random.rand(100),
'D': np.random.rand(100),
'E': np.random.rand(100),
'F': np.random.rand(100),
'G': np.random.rand(100)},
index=np.arange(100))
df2 = pd.DataFrame({'A': np.random.rand(100),
'G': np.random.rand(100)},
index=np.arange(100, 200))
In [5]:
%%timeit
pd.concat([df1, df2])
In [6]:
t1 = gt.Table({'A': np.random.rand(100),
'B': np.random.rand(100),
'C': np.random.rand(100),
'D': np.random.rand(100),
'E': np.random.rand(100),
'F': np.random.rand(100),
'G': np.random.rand(100),
'idx': np.arange(100)})
t2 = gt.Table({'A': np.random.rand(100),
'G': np.random.rand(100),
'idx': np.arange(100, 200)})
In [7]:
%%timeit
gt.full_outer_join(t1, t2, 'idx', check_sorted=False)
In [8]:
df1 = pd.DataFrame({'A': np.random.rand(100000),
'B': np.random.rand(100000),
'C': np.random.rand(100000)},
index=np.arange(100000))
df2 = pd.DataFrame({'A': np.random.rand(100000),
'B': np.random.rand(100000),
'C': np.random.rand(100000)},
index=np.arange(100000, 200000))
In [9]:
%%timeit
pd.concat([df1, df2])
In [10]:
t1 = gt.Table({'A': np.random.rand(100000),
'B': np.random.rand(100000),
'C': np.random.rand(100000),
'idx': np.arange(100000)})
t2 = gt.Table({'A': np.random.rand(100000),
'B': np.random.rand(100000),
'C': np.random.rand(100000),
'idx': np.arange(100000, 200000)})
In [11]:
%%timeit
gt.full_outer_join(t1, t2, 'idx', check_sorted=False)
In [ ]: