In [ ]:
import random
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition
In [ ]:
data = dataset.get_english_train_corpus().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200)
In [ ]:
tp = TransitionParser(Transition, FeatureExtractor)
In [ ]:
tp.train(subdata)
In [ ]:
tp.save('english.model')
In [ ]:
testdata = dataset.get_english_test_corpus().parsed_sents()
parsed = tp.parse(testdata)
In [ ]:
with open('english.conll', 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
ev = DependencyEvaluator(testdata, parsed)
print "LAS: {} \nUAS: {}".format(*ev.eval())
In [ ]:
data = dataset.get_swedish_train_corpus().parsed_sents()
random.seed(5678)
subdata = random.sample(data, 200)
In [ ]:
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata)
tp.save('swedish.model')
In [ ]:
testdata = dataset.get_swedish_test_corpus().parsed_sents()
parsed = tp.parse(testdata)
In [ ]:
with open('swedish.conll', 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
ev = DependencyEvaluator(testdata, parsed)
print "LAS: {} \nUAS: {}".format(*ev.eval())