In [1]:
# Import py_entitymatching package
import py_entitymatching as em
import os
import pandas as pd
Then, read the (sample) input tables
In [2]:
# Get the datasets directory
datasets_dir = em.get_install_path() + os.sep + 'datasets'
# Get the paths of the input tables
path = datasets_dir + os.sep + 'dblp_demo.csv'
In [3]:
# Read the CSV file and set 'ID' as the key attribute
A = em.read_csv_metadata(path, key='id')
B = em.read_csv_metadata(path, key='id')
A.head()
Out[3]:
In [4]:
# Invoke the open refine gui for data exploration
p = em.data_explore_openrefine(A, name='Table')
In [5]:
# Save the project back to our dataframe
# after calling export_pandas_frame, the openRefine project will be deleted automatically
A = p.export_pandas_frame()
In [6]:
A.head()
Out[6]:
In [7]:
# Invoke the pandastable gui for data exploration
# The process will be blocked until closing the GUI
em.data_explore_pandastable(B)
In [8]:
B.head()
Out[8]:
In [ ]: