In [1]:
import sys
In [2]:
sys.path.append('/Users/pradap/Documents/Research/Python-Package/anhaid/magellan/')
In [3]:
import magellan as mg
import magellan.catalog.catalog_manager as cm
In [4]:
import os
datasets_path = os.sep.join([mg.get_install_path(), 'datasets', 'test_datasets'])
path_a = os.sep.join([datasets_path, 'A.csv'])
path_b = os.sep.join([datasets_path, 'B.csv'])
path_c = os.sep.join([datasets_path, 'C.csv'])
In [5]:
A = mg.read_csv_metadata(path_a)
B = mg.read_csv_metadata(path_b, key='ID')
C = mg.read_csv_metadata(path_c, ltable=A, rtable=B)
In [6]:
C1 = C[['_id', 'ltable_ID', 'rtable_ID']]
num_ones = 1
num_zeros = len(C1) - num_ones
gold = [0]*num_ones
gold.extend([1]*num_zeros)
C1['gold'] = 0
C1['gold'] = gold
predicted = [1]* (num_zeros + num_ones)
C1['predicted'] = 0
C1['predicted'] = predicted
cm.copy_properties(C, C1)
Out[6]:
In [9]:
results = mg.eval_matches(C1, 'gold', 'predicted')
In [10]:
results.keys()
Out[10]:
In [11]:
import six
In [12]:
for k, v in six.iteritems(results):
print(k), print(v)
In [14]:
results = mg.eval_matches(C1, 'predicted', 'gold')
In [15]:
for k, v in six.iteritems(results):
print(k), print(v)