In [ ]:
import random
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition

English data


In [ ]:
data = dataset.get_english_train_corpus().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200)

In [ ]:
tp = TransitionParser(Transition, FeatureExtractor)

In [ ]:
tp.train(subdata)

In [ ]:
tp.save('english.model')

In [ ]:
testdata = dataset.get_english_test_corpus().parsed_sents()
parsed = tp.parse(testdata)

In [ ]:
with open('english.conll', 'w') as f:
    for p in parsed:
        f.write(p.to_conll(10).encode('utf-8'))
        f.write('\n')

ev = DependencyEvaluator(testdata, parsed)
print "LAS: {} \nUAS: {}".format(*ev.eval())

 Swedish data


In [ ]:
data = dataset.get_swedish_train_corpus().parsed_sents()
random.seed(5678)
subdata = random.sample(data, 200)

In [ ]:
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata)
tp.save('swedish.model')

In [ ]:
testdata = dataset.get_swedish_test_corpus().parsed_sents()
parsed = tp.parse(testdata)

In [ ]:
with open('swedish.conll', 'w') as f:
    for p in parsed:
        f.write(p.to_conll(10).encode('utf-8'))
        f.write('\n')

ev = DependencyEvaluator(testdata, parsed)
print "LAS: {} \nUAS: {}".format(*ev.eval())