In [1]:
import sys

In [2]:
sys.path.append('/Users/pradap/Documents/Research/Python-Package/anhaid/magellan/')

In [3]:
import magellan as mg
import magellan.catalog.catalog_manager as cm

In [4]:
import os
datasets_path = os.sep.join([mg.get_install_path(), 'datasets', 'test_datasets'])
path_a = os.sep.join([datasets_path, 'A.csv'])
path_b = os.sep.join([datasets_path, 'B.csv'])
path_c = os.sep.join([datasets_path, 'C.csv'])

In [5]:
A = mg.read_csv_metadata(path_a)
B = mg.read_csv_metadata(path_b, key='ID')
C = mg.read_csv_metadata(path_c, ltable=A, rtable=B)

In [6]:
C1 = C[['_id', 'ltable_ID', 'rtable_ID']]
num_ones = 1
num_zeros = len(C1) - num_ones
gold = [0]*num_ones
gold.extend([1]*num_zeros)
C1['gold'] = 0
C1['gold'] = gold
predicted = [1]* (num_zeros + num_ones)
C1['predicted'] = 0
C1['predicted'] = predicted
cm.copy_properties(C, C1)


/Users/pradap/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/pradap/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[6]:
True

In [9]:
results = mg.eval_matches(C1, 'gold', 'predicted')

In [10]:
results.keys()


Out[10]:
odict_keys(['prec_numerator', 'prec_denominator', 'precision', 'recall_numerator', 'recall_denominator', 'recall', 'f1', 'pred_pos_num', 'false_pos_num', 'false_pos_ls', 'pred_neg_num', 'false_neg_num', 'false_neg_ls'])

In [11]:
import six

In [12]:
for k, v in six.iteritems(results):
    print(k), print(v)


prec_numerator
14.0
prec_denominator
15.0
precision
0.9333333333333333
recall_numerator
14.0
recall_denominator
14.0
recall
1.0
f1
0.9655172413793104
pred_pos_num
15.0
false_pos_num
1.0
false_pos_ls
[('a1', 'b1')]
pred_neg_num
0.0
false_neg_num
0.0
false_neg_ls
[]

In [14]:
results = mg.eval_matches(C1, 'predicted', 'gold')

In [15]:
for k, v in six.iteritems(results):
    print(k), print(v)


prec_numerator
14.0
prec_denominator
14.0
precision
1.0
recall_numerator
14.0
recall_denominator
15.0
recall
0.9333333333333333
f1
0.9655172413793104
pred_pos_num
14.0
false_pos_num
0.0
false_pos_ls
[]
pred_neg_num
1.0
false_neg_num
1.0
false_neg_ls
[('a1', 'b1')]