Exercise 1)
In [1]:
# if two chunks follow each other, it would not be possible to make clear
# that they are two chunks instead of one chunk consisting of two words
# and also not where the first ends and the second begins
Exercise 2)
In [2]:
# singular noun phrases: <DT>?<JJ.*>*<NN.*>+
# plural noun phrases: <DT>?<JJ.*>*<NN.*>*<NNS>+
Exercise 3)
In [3]:
import nltk, re, pprint, random
from nltk.corpus import conll2000
In [4]:
numSents = len(conll2000.chunked_sents('train.txt'))
for i in xrange(10):
randomIndex = random.randint(0, numSents - 1)
print conll2000.chunked_sents('train.txt', chunk_types=['PP'])[randomIndex]
(S
Currently/RB
,/,
the/DT
Transportation/NNP
Department/NNP
does/VBZ
n't/RB
have/VB
the/DT
authority/NN
to/TO
block/VB
a/DT
takeover/NN
(PP in/IN)
advance/NN
./.)
(S
British/NNP
Air/NNP
was/VBD
originally/RB
attracted/VBN
(PP to/TO)
the/DT
chance/NN
(PP of/IN)
obtaining/VBG
a/DT
15/CD
%/NN
stake/NN
(PP in/IN)
the/DT
company/NN
,/,
but/CC
was/VBD
n't/RB
particularly/RB
happy/JJ
(PP with/IN)
paying/VBG
$/$
750/CD
million/CD
./.)
(S
That/DT
is/VBZ
usually/RB
measured/VBN
(PP by/IN)
the/DT
standard/JJ
deviation/NN
,/,
or/CC
divergence/NN
,/,
(PP of/IN)
annual/JJ
results/NNS
(PP from/IN)
the/DT
average/JJ
return/NN
(PP over/IN)
time/NN
./.)
(S
Jeffrey/NNP
Tarter/NNP
,/,
editor/NN
(PP of/IN)
SoftLetter/NNP
,/,
a/DT
Watertown/NNP
,/,
Mass./NNP
,/,
industry/NN
newsletter/NN
,/,
says/VBZ
:/:
``/``
I/PRP
've/VBP
seen/VBN
a/DT
lot/NN
(PP of/IN)
people/NNS
fooling/VBG
around/IN
(PP on/IN)
the/DT
fringes/NNS
(PP of/IN)
filtering/VBG
information/NN
./.)
(S
Amazing/JJ
what/WP
it/PRP
does/VBZ
(PP to/TO)
one/CD
's/POS
heart/NN
rate/NN
and/CC
one/CD
's/POS
short-term/JJ
memory/NN
./.)
(S
The/DT
beneficiaries/NNS
were/VBD
those/DT
financiers/NNS
whose/WP$
icon/NN
was/VBD
the/DT
topic/NN
figure/NN
(PP of/IN)
'80s/NNS
capitalism/NN
,/,
Michael/NNP
Milken/NNP
's/POS
$/$
517/CD
million/CD
salary/NN
(PP in/IN)
one/CD
year/NN
./.)
(S
(PP In/IN)
Illinois/NNP
,/,
lawmakers/NNS
will/MD
vote/VB
(PP before/IN)
next/JJ
spring/NN
(PP on/IN)
legislation/NN
requiring/VBG
physicians/NNS
to/TO
perform/VB
tests/NNS
(PP on/IN)
fetuses/NNS
(PP at/IN)
20/CD
weeks/NNS
to/TO
determine/VB
their/PRP$
gestational/JJ
age/NN
,/,
weight/NN
and/CC
lung/NN
maturity/NN
along/IN
(PP with/IN)
a/DT
provision/NN
requiring/VBG
that/IN
,/,
if/IN
fetuses/NNS
survive/VBP
an/DT
abortion/NN
,/,
a/DT
second/JJ
doctor/NN
must/MD
be/VB
(PP on/IN)
hand/NN
to/TO
help/VB
it/PRP
survive/VB
./.)
(S
Bonn/NNP
and/CC
Washington/NNP
are/VBP
leading/VBG
supporters/NNS
(PP of/IN)
Poland/NNP
's/POS
request/NN
(PP for/IN)
a/DT
$/$
1/CD
billion/CD
stand-by/JJ
credit/NN
(PP from/IN)
the/DT
International/NNP
Monetary/NNP
Fund/NNP
./.)
(S
(PP In/IN)
the/DT
nine/CD
months/NNS
,/,
net/NN
rose/VBD
35/CD
%/NN
(PP to/TO)
$/$
120.1/CD
million/CD
,/,
or/CC
$/$
1.64/CD
a/DT
share/NN
,/,
(PP from/IN)
$/$
89.2/CD
million/CD
,/,
or/CC
$/$
1.22/CD
a/DT
share/NN
,/,
a/DT
year/NN
earlier/RBR
./.)
(S
``/``
That/DT
's/VBZ
what/WP
they/PRP
're/VBP
(PP after/IN)
./.
''/'')
In [5]:
# possible tags: TO, IN, RB IN, VBG, JJ IN
In [6]:
grammar = r"""
PP: {<RB|JJ\$>?<IN>} # e.g. such as
{<TO|VBG>} # e.g. to, regarding
"""
cp = nltk.RegexpParser(grammar)
brown = nltk.corpus.brown
for sent in brown.tagged_sents()[:10]:
tree = cp.parse(sent)
print tree
(S
The/AT
Fulton/NP-TL
County/NN-TL
Grand/JJ-TL
Jury/NN-TL
said/VBD
Friday/NR
an/AT
investigation/NN
(PP of/IN)
Atlanta's/NP$
recent/JJ
primary/NN
election/NN
produced/VBD
``/``
no/AT
evidence/NN
''/''
that/CS
any/DTI
irregularities/NNS
took/VBD
place/NN
./.)
(S
The/AT
jury/NN
further/RBR
said/VBD
(PP in/IN)
term-end/NN
presentments/NNS
that/CS
the/AT
City/NN-TL
Executive/JJ-TL
Committee/NN-TL
,/,
which/WDT
had/HVD
over-all/JJ
charge/NN
(PP of/IN)
the/AT
election/NN
,/,
``/``
deserves/VBZ
the/AT
praise/NN
and/CC
thanks/NNS
(PP of/IN)
the/AT
City/NN-TL
of/IN-TL
Atlanta/NP-TL
''/''
(PP for/IN)
the/AT
manner/NN
(PP in/IN)
which/WDT
the/AT
election/NN
was/BEDZ
conducted/VBN
./.)
(S
The/AT
September-October/NP
term/NN
jury/NN
had/HVD
been/BEN
charged/VBN
(PP by/IN)
Fulton/NP-TL
Superior/JJ-TL
Court/NN-TL
Judge/NN-TL
Durwood/NP
Pye/NP
(PP to/TO)
investigate/VB
reports/NNS
(PP of/IN)
possible/JJ
``/``
irregularities/NNS
''/''
(PP in/IN)
the/AT
hard-fought/JJ
primary/NN
which/WDT
was/BEDZ
won/VBN
(PP by/IN)
Mayor-nominate/NN-TL
Ivan/NP
Allen/NP
Jr./NP
./.)
(S
``/``
Only/RB
a/AT
relative/JJ
handful/NN
(PP of/IN)
such/JJ
reports/NNS
was/BEDZ
received/VBN
''/''
,/,
the/AT
jury/NN
said/VBD
,/,
``/``
(PP considering/IN)
the/AT
widespread/JJ
interest/NN
(PP in/IN)
the/AT
election/NN
,/,
the/AT
number/NN
(PP of/IN)
voters/NNS
and/CC
the/AT
size/NN
(PP of/IN)
this/DT
city/NN
''/''
./.)
(S
The/AT
jury/NN
said/VBD
it/PPS
did/DOD
find/VB
that/CS
many/AP
(PP of/IN)
Georgia's/NP$
registration/NN
and/CC
election/NN
laws/NNS
``/``
are/BER
outmoded/JJ
or/CC
inadequate/JJ
and/CC
often/RB
ambiguous/JJ
''/''
./.)
(S
It/PPS
recommended/VBD
that/CS
Fulton/NP
legislators/NNS
act/VB
``/``
(PP to/TO)
have/HV
these/DTS
laws/NNS
studied/VBN
and/CC
revised/VBN
(PP to/IN)
the/AT
end/NN
(PP of/IN)
(PP modernizing/VBG)
and/CC
(PP improving/VBG)
them/PPO
''/''
./.)
(S
The/AT
grand/JJ
jury/NN
commented/VBD
(PP on/IN)
a/AT
number/NN
(PP of/IN)
other/AP
topics/NNS
,/,
(PP among/IN)
them/PPO
the/AT
Atlanta/NP
and/CC
Fulton/NP-TL
County/NN-TL
(PP purchasing/VBG)
departments/NNS
which/WDT
it/PPS
said/VBD
``/``
are/BER
well/QL
operated/VBN
and/CC
follow/VB
generally/RB
accepted/VBN
practices/NNS
which/WDT
inure/VB
(PP to/IN)
the/AT
best/JJT
interest/NN
(PP of/IN)
both/ABX
governments/NNS
''/''
./.)
(S Merger/NN-HL proposed/VBN-HL)
(S
However/WRB
,/,
the/AT
jury/NN
said/VBD
it/PPS
believes/VBZ
``/``
these/DTS
two/CD
offices/NNS
should/MD
be/BE
combined/VBN
(PP to/TO)
achieve/VB
greater/JJR
efficiency/NN
and/CC
reduce/VB
the/AT
cost/NN
(PP of/IN)
administration/NN
''/''
./.)
(S
The/AT
City/NN-TL
Purchasing/VBG-TL
Department/NN-TL
,/,
the/AT
jury/NN
said/VBD
,/,
``/``
is/BEZ
(PP lacking/VBG)
(PP in/IN)
experienced/VBN
clerical/JJ
personnel/NNS
as/CS
a/AT
result/NN
(PP of/IN)
city/NN
personnel/NNS
policies/NNS
''/''
./.)
In [7]:
# VBG is problematic of course, might have been a false hit / tag in the first place
Exercise 4
In [8]:
# for some input: get VP chunks from corpus
for i in xrange(10):
print conll2000.chunked_sents('train.txt', chunk_types=['VP'])[i]
(S
Confidence/NN
in/IN
the/DT
pound/NN
(VP is/VBZ widely/RB expected/VBN to/TO take/VB)
another/DT
sharp/JJ
dive/NN
if/IN
trade/NN
figures/NNS
for/IN
September/NNP
,/,
due/JJ
for/IN
release/NN
tomorrow/NN
,/,
(VP fail/VB to/TO show/VB)
a/DT
substantial/JJ
improvement/NN
from/IN
July/NNP
and/CC
August/NNP
's/POS
near-record/JJ
deficits/NNS
./.)
(S
Chancellor/NNP
of/IN
the/DT
Exchequer/NNP
Nigel/NNP
Lawson/NNP
's/POS
restated/VBN
commitment/NN
to/TO
a/DT
firm/NN
monetary/JJ
policy/NN
(VP has/VBZ helped/VBN to/TO prevent/VB)
a/DT
freefall/NN
in/IN
sterling/NN
over/IN
the/DT
past/JJ
week/NN
./.)
(S
But/CC
analysts/NNS
(VP reckon/VBP)
underlying/VBG
support/NN
for/IN
sterling/NN
(VP has/VBZ been/VBN eroded/VBN)
by/IN
the/DT
chancellor/NN
's/POS
failure/NN
(VP to/TO announce/VB)
any/DT
new/JJ
policy/NN
measures/NNS
in/IN
his/PRP$
Mansion/NNP
House/NNP
speech/NN
last/JJ
Thursday/NNP
./.)
(S
This/DT
(VP has/VBZ increased/VBN)
the/DT
risk/NN
of/IN
the/DT
government/NN
(VP being/VBG forced/VBN to/TO increase/VB)
base/NN
rates/NNS
to/TO
16/CD
%/NN
from/IN
their/PRP$
current/JJ
15/CD
%/NN
level/NN
(VP to/TO defend/VB)
the/DT
pound/NN
,/,
economists/NNS
and/CC
foreign/JJ
exchange/NN
market/NN
analysts/NNS
(VP say/VBP)
./.)
(S
``/``
The/DT
risks/NNS
for/IN
sterling/NN
of/IN
a/DT
bad/JJ
trade/NN
figure/NN
(VP are/VBP)
very/RB
heavily/RB
on/IN
the/DT
down/JJ
side/NN
,/,
''/''
(VP said/VBD)
Chris/NNP
Dillow/NNP
,/,
senior/JJ
U.K./NNP
economist/NN
at/IN
Nomura/NNP
Research/NNP
Institute/NNP
./.)
(S
``/``
If/IN
there/EX
(VP is/VBZ)
another/DT
bad/JJ
trade/NN
number/NN
,/,
there/EX
(VP could/MD be/VB)
an/DT
awful/JJ
lot/NN
of/IN
pressure/NN
,/,
''/''
(VP noted/VBD)
Simon/NNP
Briscoe/NNP
,/,
U.K./NNP
economist/NN
for/IN
Midland/NNP
Montagu/NNP
,/,
a/DT
unit/NN
of/IN
Midland/NNP
Bank/NNP
PLC/NNP
./.)
(S
Forecasts/NNS
for/IN
the/DT
trade/NN
figures/NNS
(VP range/VBP)
widely/RB
,/,
but/CC
few/JJ
economists/NNS
(VP expect/VBP)
the/DT
data/NNS
(VP to/TO show/VB)
a/DT
very/RB
marked/VBN
improvement/NN
from/IN
the/DT
#/#
2/CD
billion/CD
-LRB-/(
$/$
3.2/CD
billion/CD
-RRB-/)
deficit/NN
in/IN
the/DT
current/JJ
account/NN
(VP reported/VBD)
for/IN
August/NNP
./.)
(S
The/DT
August/NNP
deficit/NN
and/CC
the/DT
#/#
2.2/CD
billion/CD
gap/NN
(VP registered/VBN)
in/IN
July/NNP
(VP are/VBP topped/VBN)
only/RB
by/IN
the/DT
#/#
2.3/CD
billion/CD
deficit/NN
of/IN
October/NNP
1988/CD
./.)
(S
Sanjay/NNP
Joshi/NNP
,/,
European/JJ
economist/NN
at/IN
Baring/NNP
Brothers/NNPS
&/CC
Co./NNP
,/,
(VP said/VBD)
there/EX
(VP is/VBZ)
no/DT
sign/NN
that/IN
Britain/NNP
's/POS
manufacturing/NN
industry/NN
(VP is/VBZ transforming/VBG)
itself/PRP
(VP to/TO boost/VB)
exports/NNS
./.)
(S
At/IN
the/DT
same/JJ
time/NN
,/,
he/PRP
(VP remains/VBZ)
fairly/RB
pessimistic/JJ
about/IN
the/DT
outlook/NN
for/IN
imports/NNS
,/,
given/VBN
continued/VBD
high/JJ
consumer/NN
and/CC
capital/NN
goods/NNS
inflows/NNS
./.)
In [9]:
# get the chinks of given example sentences (unfinished):
tag_sequences_to_chink = set()
for i in xrange(10):
randomIndex = random.randint(0, numSents - 1)
tree = conll2000.chunked_sents('train.txt', chunk_types=['NP', 'PP'])[randomIndex]
for subtree in tree.subtrees():
if subtree.label() != 'S':
tag_sequences_to_chink.add(tuple([tag for (word, tag) in subtree.leaves()]))
print tag_sequences_to_chink
set([(u'DT', u'CD'), (u'NNP',), (u'JJR', u'NNS'), (u'DT', u'NNP', u'NN'), (u'NN',), (u'TO',), (u'PRP$', u'JJ', u'NNP', u'NNP', u'NNP', u'NN', u'NN'), (u'JJ', u'NN', u'NN'), (u'DT', u'$', u'CD', u'CD', u'NN'), (u'DT',), (u'PRP',), (u'VB', u'CC', u'NNS'), (u'RB', u'CD', u'JJ', u'NN'), (u'POS', u'VBG', u'NN', u'NN'), (u'JJ', u'NN'), (u'NNP', u'NNP'), (u'DT', u'JJ', u'NN'), (u'NNS',), (u'IN',), (u'POS', u'NN'), (u'PRP$', u'NNP', u',', u'NNP', u',', u'NN'), (u'CD',), (u'CD', u'NNS'), (u'NN', u'CC', u'JJ', u'NNS'), (u'DT', u'VBN', u'NN'), (u'CD', u'NN', u'NNS'), (u'DT', u'NN')])
In [22]:
rule_string = r"""
VP:
{<.*>+} # Chunk everything
"""
for tag_seq in tag_sequences_to_chink: # include a chink rule for each sequence determined in the stap before
rule_string += '\n}'
for tag in tag_seq:
rule_string += '<' + tag + '>'
rule_string += '{'
chink_parser = nltk.RegexpParser(rule_string)
for i in xrange(10):
print chink_parser.parse(conll2000.tagged_sents('train.txt')[i])
(S
Confidence/NN
in/IN
the/DT
pound/NN
(VP is/VBZ widely/RB expected/VBN)
to/TO
(VP take/VB)
another/DT
(VP sharp/JJ)
dive/NN
if/IN
trade/NN
figures/NNS
for/IN
September/NNP
(VP ,/, due/JJ)
for/IN
release/NN
tomorrow/NN
(VP ,/, fail/VB)
to/TO
(VP show/VB)
a/DT
(VP substantial/JJ)
improvement/NN
from/IN
July/NNP
(VP and/CC)
August/NNP
(VP 's/POS near-record/JJ)
deficits/NNS
(VP ./.))
(S
Chancellor/NNP
of/IN
the/DT
Exchequer/NNP
Nigel/NNP
Lawson/NNP
(VP 's/POS restated/VBN)
commitment/NN
to/TO
a/DT
firm/NN
(VP monetary/JJ)
policy/NN
(VP has/VBZ helped/VBN)
to/TO
(VP prevent/VB)
a/DT
freefall/NN
in/IN
sterling/NN
over/IN
the/DT
(VP past/JJ)
week/NN
(VP ./.))
(S
(VP But/CC)
analysts/NNS
(VP reckon/VBP underlying/VBG)
support/NN
for/IN
sterling/NN
(VP has/VBZ been/VBN eroded/VBN)
by/IN
the/DT
chancellor/NN
(VP 's/POS)
failure/NN
to/TO
(VP announce/VB)
any/DT
(VP new/JJ)
policy/NN
measures/NNS
in/IN
(VP his/PRP$)
Mansion/NNP
House/NNP
speech/NN
(VP last/JJ)
Thursday/NNP
(VP ./.))
(S
This/DT
(VP has/VBZ increased/VBN)
the/DT
risk/NN
of/IN
the/DT
government/NN
(VP being/VBG forced/VBN)
to/TO
(VP increase/VB)
base/NN
rates/NNS
to/TO
16/CD
%/NN
from/IN
(VP their/PRP$ current/JJ)
15/CD
%/NN
level/NN
to/TO
(VP defend/VB)
the/DT
pound/NN
(VP ,/,)
economists/NNS
(VP and/CC foreign/JJ)
exchange/NN
market/NN
analysts/NNS
(VP say/VBP ./.))
(S
(VP ``/``)
The/DT
risks/NNS
for/IN
sterling/NN
of/IN
a/DT
(VP bad/JJ)
trade/NN
figure/NN
(VP are/VBP very/RB heavily/RB)
on/IN
the/DT
(VP down/JJ)
side/NN
(VP ,/, ''/'' said/VBD)
Chris/NNP
Dillow/NNP
(VP ,/, senior/JJ)
U.K./NNP
economist/NN
at/IN
Nomura/NNP
Research/NNP
Institute/NNP
(VP ./.))
(S
(VP ``/``)
If/IN
(VP there/EX is/VBZ)
another/DT
(VP bad/JJ)
trade/NN
number/NN
(VP ,/, there/EX could/MD be/VB)
an/DT
(VP awful/JJ)
lot/NN
of/IN
pressure/NN
(VP ,/, ''/'' noted/VBD)
Simon/NNP
Briscoe/NNP
(VP ,/,)
U.K./NNP
economist/NN
for/IN
Midland/NNP
Montagu/NNP
(VP ,/,)
a/DT
unit/NN
of/IN
Midland/NNP
Bank/NNP
PLC/NNP
(VP ./.))
(S
Forecasts/NNS
for/IN
the/DT
trade/NN
figures/NNS
(VP range/VBP widely/RB ,/, but/CC few/JJ)
economists/NNS
(VP expect/VBP)
the/DT
data/NNS
to/TO
(VP show/VB)
a/DT
(VP very/RB marked/VBN)
improvement/NN
from/IN
the/DT
(VP #/#)
2/CD
billion/CD
(VP -LRB-/( $/$)
3.2/CD
billion/CD
(VP -RRB-/))
deficit/NN
in/IN
the/DT
(VP current/JJ)
account/NN
(VP reported/VBD)
for/IN
August/NNP
(VP ./.))
(S
The/DT
August/NNP
deficit/NN
(VP and/CC)
the/DT
(VP #/#)
2.2/CD
billion/CD
gap/NN
(VP registered/VBN)
in/IN
July/NNP
(VP are/VBP topped/VBN only/RB)
by/IN
the/DT
(VP #/#)
2.3/CD
billion/CD
deficit/NN
of/IN
October/NNP
1988/CD
(VP ./.))
(S
Sanjay/NNP
Joshi/NNP
(VP ,/, European/JJ)
economist/NN
at/IN
Baring/NNP
(VP Brothers/NNPS &/CC)
Co./NNP
(VP ,/, said/VBD there/EX is/VBZ)
no/DT
sign/NN
that/IN
Britain/NNP
(VP 's/POS)
manufacturing/NN
industry/NN
(VP is/VBZ transforming/VBG)
itself/PRP
to/TO
(VP boost/VB)
exports/NNS
(VP ./.))
(S
At/IN
the/DT
(VP same/JJ)
time/NN
(VP ,/,)
he/PRP
(VP remains/VBZ fairly/RB pessimistic/JJ)
about/IN
the/DT
outlook/NN
for/IN
imports/NNS
(VP ,/, given/VBN continued/VBD high/JJ)
consumer/NN
(VP and/CC)
capital/NN
goods/NNS
inflows/NNS
(VP ./.))
In [20]:
# quite a number of false hits, but maybe quite good given the naive approach / little data used here
Exercise 7
In [24]:
# a)
test_sents = conll2000.chunked_sents('test.txt', chunk_types=['VP'])[:100]
chunkscore = chink_parser.evaluate(test_sents)
print chunkscore
ChunkParse score:
IOB Accuracy: 66.3%%
Precision: 13.0%%
Recall: 40.9%%
F-Measure: 19.8%%
In [28]:
# b)
pprint.pprint(chunkscore.missed())
[ImmutableTree('VP', [(u'added', u'VBD')]),
ImmutableTree('VP', [(u'is', u'VBZ'), (u'newly', u'RB'), (u'created', u'VBN')]),
ImmutableTree('VP', [(u'increased', u'VBN')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'would', u'MD'), (u'be', u'VB')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'buy', u'VB')]),
ImmutableTree('VP', [(u'collapsed', u'VBD')]),
ImmutableTree('VP', [(u'based', u'VBN')]),
ImmutableTree('VP', [(u'happened', u'VBD')]),
ImmutableTree('VP', [(u'served', u'VBN')]),
ImmutableTree('VP', [(u'totaled', u'VBD')]),
ImmutableTree('VP', [(u'was', u'VBD')]),
ImmutableTree('VP', [(u'would', u'MD'), (u'still', u'RB'), (u'get', u'VB')]),
ImmutableTree('VP', [(u'wanted', u'VBD')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'make', u'VB')]),
ImmutableTree('VP', [(u'are', u'VBP'), (u'prepaying', u'VBG')]),
ImmutableTree('VP', [(u'includes', u'VBZ')]),
ImmutableTree('VP', [(u'is', u'VBZ')]),
ImmutableTree('VP', [(u'declined', u'VBD'), (u'to', u'TO'), (u'comment', u'VB')]),
ImmutableTree('VP', [(u'increased', u'VBN')]),
ImmutableTree('VP', [(u'were', u'VBD'), (u"n't", u'RB'), (u'disclosed', u'VBN')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'reviewing', u'VBG')]),
ImmutableTree('VP', [(u'continue', u'VBP'), (u'to', u'TO'), (u'plummet', u'VB')]),
ImmutableTree('VP', [(u'stood', u'VBD'), (u'to', u'TO'), (u'gain', u'VB')]),
ImmutableTree('VP', [(u'focusing', u'VBG')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'agreed', u'VBD'), (u'to', u'TO'), (u'step', u'VB')]),
ImmutableTree('VP', [(u'is', u'VBZ')]),
ImmutableTree('VP', [(u'was', u'VBD'), (u'named', u'VBN')]),
ImmutableTree('VP', [(u'says', u'VBZ')]),
ImmutableTree('VP', [(u'reflecting', u'VBG')]),
ImmutableTree('VP', [(u'posted', u'VBD')]),
ImmutableTree('VP', [(u'were', u'VBD')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'could', u'MD'), (u'be', u'VB'), (u'bumped', u'VBN')]),
ImmutableTree('VP', [(u'had', u'VBD'), (u'bought', u'VBN')]),
ImmutableTree('VP', [(u'increased', u'VBD')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'continued', u'VBD'), (u'to', u'TO'), (u'lead', u'VB')]),
ImmutableTree('VP', [(u'were', u'VBD')]),
ImmutableTree('VP', [(u'reported', u'VBD')]),
ImmutableTree('VP', [(u'had', u'VBD'), (u'been', u'VBN')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'fuel', u'VB')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'supply', u'VB')]),
ImmutableTree('VP', [(u'were', u'VBD')]),
ImmutableTree('VP', [(u'filling', u'VBG')]),
ImmutableTree('VP', [(u'estimated', u'VBD')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'reported', u'VBD')]),
ImmutableTree('VP', [(u'dived', u'VBD')]),
ImmutableTree('VP', [(u'led', u'VBN')]),
ImmutableTree('VP', [(u'bolstered', u'VBN')]),
ImmutableTree('VP', [(u'has', u'VBZ'), (u'lagged', u'VBN')]),
ImmutableTree('VP', [(u'had', u'VBD')]),
ImmutableTree('VP', [(u'slowed', u'VBD')]),
ImmutableTree('VP', [(u'expected', u'VBN'), (u'to', u'TO'), (u'increase', u'VB')]),
ImmutableTree('VP', [(u'resigned', u'VBD')]),
ImmutableTree('VP', [(u'had', u'VBD')]),
ImmutableTree('VP', [(u'has', u'VBZ')]),
ImmutableTree('VP', [(u'blending', u'VBG')]),
ImmutableTree('VP', [(u'angering', u'VBG')]),
ImmutableTree('VP', [(u'left', u'VBD')]),
ImmutableTree('VP', [(u'will', u'MD'), (u'be', u'VB'), (u'done', u'VBN')]),
ImmutableTree('VP', [(u'has', u'VBZ'), (u'given', u'VBN')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'pursue', u'VB')]),
ImmutableTree('VP', [(u'more', u'JJR'), (u'than', u'IN'), (u'offset', u'VB')]),
ImmutableTree('VP', [(u'increased', u'VBN')]),
ImmutableTree('VP', [(u'has', u'VBZ')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'representing', u'VBG')]),
ImmutableTree('VP', [(u'was', u'VBD')]),
ImmutableTree('VP', [(u'closed', u'VBD')]),
ImmutableTree('VP', [(u'agreed', u'VBD'), (u'to', u'TO'), (u'invest', u'VB')]),
ImmutableTree('VP', [(u'took', u'VBD')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'act', u'VB')]),
ImmutableTree('VP', [(u'provide', u'VBP')]),
ImmutableTree('VP', [(u'wo', u'MD'), (u"n't", u'RB'), (u'serve', u'VB')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'fired', u'VBD')]),
ImmutableTree('VP', [(u'citing', u'VBG')]),
ImmutableTree('VP', [(u'reduced', u'VBD')]),
ImmutableTree('VP', [(u'has', u'VBZ')]),
ImmutableTree('VP', [(u'saw', u'VBD')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'lure', u'VB')]),
ImmutableTree('VP', [(u"'s", u'VBZ')]),
ImmutableTree('VP', [(u'continued', u'VBD'), (u'to', u'TO'), (u'pace', u'VB')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'resigned', u'VBD')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'provide', u'VB')]),
ImmutableTree('VP', [(u'led', u'VBN')]),
ImmutableTree('VP', [(u'could', u'MD'), (u'say', u'VB')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'buy', u'VB')]),
ImmutableTree('VP', [(u'said', u'VBD')]),
ImmutableTree('VP', [(u'extending', u'VBG')]),
ImmutableTree('VP', [(u'were', u'VBD'), (u'scheduled', u'VBN'), (u'to', u'TO'), (u'reap', u'VB')]),
ImmutableTree('VP', [(u'raising', u'VBG')]),
ImmutableTree('VP', [(u'based', u'VBN')]),
ImmutableTree('VP', [(u'reflecting', u'VBG')]),
ImmutableTree('VP', [(u'include', u'VBP')]),
ImmutableTree('VP', [(u'was', u'VBD')]),
ImmutableTree('VP', [(u'had', u'VBD')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'stash', u'VB')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'improve', u'VB')]),
ImmutableTree('VP', [(u'would', u'MD'), (u'have', u'VB'), (u'realized', u'VBN')]),
ImmutableTree('VP', [(u'to', u'TO'), (u'close', u'VB')]),
ImmutableTree('VP', [(u'had', u'VBD')]),
ImmutableTree('VP', [(u'has', u'VBZ')]),
ImmutableTree('VP', [(u'was', u'VBD'), (u'named', u'VBN')]),
ImmutableTree('VP', [(u'has', u'VBZ'), (u'been', u'VBN')]),
ImmutableTree('VP', [(u'was', u'VBD')]),
ImmutableTree('VP', [(u'based', u'VBN')])]
In [29]:
pprint.pprint(chunkscore.incorrect())
[ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u'extending', u'VBG'), (u'its', u'PRP$')]),
ImmutableTree('VP', [(u'private', u'JJ')]),
ImmutableTree('VP', [(u'bank', u'VBP'), (u'took', u'VBD')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'said', u'VBD')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC'), (u'net', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'``', u'``')]),
ImmutableTree('VP', [(u'enough', u'RB'), (u'--', u':')]),
ImmutableTree('VP', [(u'initial', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u',', u','), (u'reflecting', u'VBG')]),
ImmutableTree('VP', [(u'similar', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'based', u'VBN')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u'.', u'.')]),
ImmutableTree('VP', [(u'last', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$'), (u'300-a-share', u'JJ')]),
ImmutableTree('VP', [(u"'", u'POS')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'chief', u'JJ'), (u'operating', u'VBG')]),
ImmutableTree('VP', [(u'real', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'composite', u'JJ')]),
ImmutableTree('VP', [(u'additional', u'JJ'), (u'so-called', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u','), (u'bolstered', u'VBN')]),
ImmutableTree('VP', [(u'reduced', u'VBD'), (u'third-quarter', u'JJ'), (u'and', u'CC'), (u'first-nine-month', u'JJ')]),
ImmutableTree('VP', [(u'reported', u'VBD'), (u'robust', u'JJ'), (u'third-quarter', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'lure', u'VB')]),
ImmutableTree('VP', [(u'third', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'third-quarter', u'JJ')]),
ImmutableTree('VP', [(u'continuing', u'VBG')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'was', u'VBD'), (u'$', u'$')]),
ImmutableTree('VP', [(u'were', u'VBD'), (u"n't", u'RB')]),
ImmutableTree('VP', [(u'following', u'VBG')]),
ImmutableTree('VP', [(u',', u','), (u'has', u'VBZ'), (u'been', u'VBN')]),
ImmutableTree('VP', [(u'major', u'JJ')]),
ImmutableTree('VP', [(u'newer', u'JJR'), (u',', u','), (u'big-selling', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'--', u':')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'increased', u'VBN')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'increased', u'VBN')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'estimated', u'VBN')]),
ImmutableTree('VP', [(u'Skies', u'NNPS'), (u'--', u':'), (u'free', u'JJ'), (u'first-class', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'or', u'CC'), (u'$', u'$')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'collapsed', u'VBD'), (u'last', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'who', u'WP'), (u'resigned', u'VBD')]),
ImmutableTree('VP', [(u'third-quarter', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'or', u'CC'), (u'$', u'$')]),
ImmutableTree('VP', [(u',', u','), (u'posted', u'VBD')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'fixed', u'VBN'), (u'leading', u'VBG')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'is', u'VBZ')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'best', u'JJS')]),
ImmutableTree('VP', [(u',', u','), (u'however', u'RB'), (u',', u','), (u'led', u'VBN')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'step', u'VB'), (u'down', u'RB')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'representing', u'VBG'), (u'general', u'JJ'), (u'and', u'CC'), (u'administrative', u'JJ')]),
ImmutableTree('VP', [(u'include', u'VBP'), (u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'that', u'WDT'), (u'has', u'VBZ'), (u'given', u'VBN')]),
ImmutableTree('VP', [(u'interim', u'JJ')]),
ImmutableTree('VP', [(u'buy', u'VB')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'ago', u'RB'), (u'.', u'.')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'management-led', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'provide', u'VB'), (u'structural', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'other', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'continue', u'VBP')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'declined', u'VBD')]),
ImmutableTree('VP', [(u'When', u'WRB')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'Including', u'VBG')]),
ImmutableTree('VP', [(u'very', u'RB'), (u'meaningful', u'JJ'), (u"''", u"''")]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'would', u'MD'), (u'have', u'VB'), (u'realized', u'VBN'), (u'had', u'VBN')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC'), (u'their', u'PRP$')]),
ImmutableTree('VP', [(u',', u','), (u'which', u'WDT'), (u'has', u'VBZ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u',', u','), (u'that', u'WDT'), (u'totaled', u'VBD'), (u'about', u'RB'), (u'$', u'$')]),
ImmutableTree('VP', [(u'Pacemakers', u'NNPS')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'compared', u'VBN')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'were', u'VBD'), (u"n't", u'RB'), (u'disclosed', u'VBN'), (u'.', u'.')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'estimated', u'VBD'), (u'operating', u'VBG')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'holding', u'VBG')]),
ImmutableTree('VP', [(u'earlier', u'RBR'), (u';', u':'), (u'including', u'VBG'), (u'discontinued', u'VBN')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'about', u'RB'), (u'$', u'$')]),
ImmutableTree('VP', [(u'continued', u'VBD')]),
ImmutableTree('VP', [(u'medical-instrument', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'net', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'automotive', u'JJ'), (u'and', u'CC')]),
ImmutableTree('VP', [(u'paltry', u'JJ'), (u'$', u'$')]),
ImmutableTree('VP', [(u'that', u'WDT'), (u'slowed', u'VBD')]),
ImmutableTree('VP', [(u'world-wide', u'JJ')]),
ImmutableTree('VP', [(u'gain', u'VB')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u','), (u'said', u'VBD')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'tentative', u'JJ')]),
ImmutableTree('VP', [(u'slight', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'dived', u'VBD'), (u'$', u'$')]),
ImmutableTree('VP', [(u'40-year', u'JJ'), (u'old', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'is', u'VBZ'), (u'``', u'``'), (u'comfortable', u'JJ'), (u"''", u"''")]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'were', u'VBD'), (u'scheduled', u'VBN')]),
ImmutableTree('VP', [(u'less', u'JJR')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'combined', u'VBN'), (u'$', u'$')]),
ImmutableTree('VP', [(u'lower', u'JJR')]),
ImmutableTree('VP', [(u'earlier', u'RBR'), (u'.', u'.')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'``', u'``'), (u'But', u'CC')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'apart', u'RB')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'other', u'JJ')]),
ImmutableTree('VP', [(u'executive', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'reported', u'VBD')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'non-prescription', u'JJ')]),
ImmutableTree('VP', [(u'had', u'VBD'), (u'operating', u'VBG')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'would', u'MD'), (u'still', u'RB'), (u'get', u'VB'), (u'about', u'RB'), (u'$', u'$')]),
ImmutableTree('VP', [(u'offset', u'VB')]),
ImmutableTree('VP', [(u'act', u'VB')]),
ImmutableTree('VP', [(u'agreed', u'VBD')]),
ImmutableTree('VP', [(u'stronger', u'JJR')]),
ImmutableTree('VP', [(u'increased', u'VBD'), (u'its', u'PRP$')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'improve', u'VB'), (u'-LCB-', u'(')]),
ImmutableTree('VP', [(u'poor', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u',', u','), (u'or', u'CC'), (u'$', u'$')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'what', u'WP'), (u'happened', u'VBD')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'real', u'JJ')]),
ImmutableTree('VP', [(u'just', u'RB'), (u'$', u'$')]),
ImmutableTree('VP', [(u'left', u'VBD'), (u'his', u'PRP$'), (u'last', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'outstanding', u'JJ')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'has', u'VBZ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'reviewing', u'VBG'), (u'its', u'PRP$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'its', u'PRP$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'60%-held', u'JJ')]),
ImmutableTree('VP', [(u'International', u'JJ')]),
ImmutableTree('VP', [(u'reap', u'VB')]),
ImmutableTree('VP', [(u'pursue', u'VB'), (u'other', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'who', u'WP'), (u'resigned', u'VBD')]),
ImmutableTree('VP', [(u',', u','), (u'such', u'JJ')]),
ImmutableTree('VP', [(u'Even', u'RB')]),
ImmutableTree('VP', [(u'scaled-back', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'medical', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'which', u'WDT'), (u'includes', u'VBZ'), (u'such', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'when', u'WRB')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'combined', u'VBN')]),
ImmutableTree('VP', [(u'that', u'WDT'), (u'were', u'VBD')]),
ImmutableTree('VP', [(u'more', u'JJR'), (u'will', u'MD'), (u'be', u'VB'), (u'done', u'VBN'), (u',', u','), (u"''", u"''")]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'Airlines', u'NNPS')]),
ImmutableTree('VP', [(u'more', u'JJR')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'new', u'JJ'), (u'cholesterol-lowering', u'JJ')]),
ImmutableTree('VP', [(u'strong', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'world-wide', u'JJ')]),
ImmutableTree('VP', [(u'leading', u'VBG')]),
ImmutableTree('VP', [(u',', u','), (u'based', u'VBN')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u',', u','), (u'or', u'CC')]),
ImmutableTree('VP', [(u'expected', u'VBN')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'respectively', u'RB'), (u'.', u'.')]),
ImmutableTree('VP', [(u'agreed', u'VBD')]),
ImmutableTree('VP', [(u'could', u'MD'), (u'be', u'VB'), (u'bumped', u'VBN'), (u'back', u'RB')]),
ImmutableTree('VP', [(u'what', u'WP')]),
ImmutableTree('VP', [(u'stash', u'VB'), (u'away', u'RB')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'World-wide', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'per-share', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.'), (u"''", u"''")]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'has', u'VBZ'), (u'lagged', u'VBN'), (u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'away', u'RB')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC'), (u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'other', u'JJ')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'Facilities', u'NNPS'), (u'closed', u'VBD')]),
ImmutableTree('VP', [(u'comment', u'VB'), (u'.', u'.')]),
ImmutableTree('VP', [(u'combined', u'VBN'), (u'$', u'$')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'world-wide', u'JJ'), (u'third-quarter', u'JJ')]),
ImmutableTree('VP', [(u'same', u'JJ')]),
ImmutableTree('VP', [(u'&', u'CC')]),
ImmutableTree('VP', [(u'strong', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'is', u'VBZ'), (u'newly', u'RB'), (u'created', u'VBN'), (u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'domestic', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'so', u'RB'), (u'did', u'VBD')]),
ImmutableTree('VP', [(u'Facilities', u'NNPS'), (u'also', u'RB'), (u'said', u'VBD')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'other', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'unchanged', u'JJ'), (u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'net', u'JJ')]),
ImmutableTree('VP', [(u'last', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'composite', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'expected', u'VBN')]),
ImmutableTree('VP', [(u'so', u'RB')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u';', u':')]),
ImmutableTree('VP', [(u'was', u'VBD'), (u'good', u'JJ')]),
ImmutableTree('VP', [(u'special', u'JJ')]),
ImmutableTree('VP', [(u'weak', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'real', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'VBZ'), (u'not', u'RB'), (u'mediocre', u'JJ'), (u',', u',')]),
ImmutableTree('VP', [(u'&', u'CC')]),
ImmutableTree('VP', [(u',', u','), (u'angering', u'VBG')]),
ImmutableTree('VP', [(u'invest', u'VB')]),
ImmutableTree('VP', [(u'Nine-month', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'cardiovascular', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC'), (u'its', u'PRP$'), (u'chief', u'JJ'), (u'financial', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'abroad', u'RB'), (u'.', u'.')]),
ImmutableTree('VP', [(u'&', u'CC')]),
ImmutableTree('VP', [(u'fired', u'VBD'), (u'and', u'CC')]),
ImmutableTree('VP', [(u',', u','), (u'raising', u'VBG'), (u'its', u'PRP$'), (u'total', u'JJ')]),
ImmutableTree('VP', [(u'said', u'VBD'), (u'.', u'.')]),
ImmutableTree('VP', [(u'existing', u'VBG')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'established', u'VBN')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC'), (u'senior', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'national', u'JJ'), (u'over-the-counter', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'Facilities', u'NNPS'), (u'said', u'VBD'), (u',', u','), (u'but', u'CC'), (u'wo', u'MD'), (u"n't", u'RB'), (u'serve', u'VB')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u',', u','), (u'including', u'VBG')]),
ImmutableTree('VP', [(u'saw', u'VBD'), (u'their', u'PRP$'), (u'prospective', u'JJ'), (u'personal', u'JJ')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'has', u'VBZ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'his', u'PRP$'), (u'previous', u'JJ')]),
ImmutableTree('VP', [(u'increase', u'VB'), (u'more', u'JJR')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u"'s", u'POS'), (u'promotional', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'$', u'$')]),
ImmutableTree('VP', [(u'plummet', u'VB')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'&', u'CC')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'segment', u'VBP'), (u'.', u'.')]),
ImmutableTree('VP', [(u'``', u'``')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'Facilities', u'NNPS'), (u'said', u'VBD')]),
ImmutableTree('VP', [(u',', u','), (u'down', u'RB')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'world-wide', u'JJ'), (u'agricultural', u'JJ')]),
ImmutableTree('VP', [(u'Intense', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'reflecting', u'VBG')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'$', u'$')]),
ImmutableTree('VP', [(u"'", u'POS')]),
ImmutableTree('VP', [(u'increased', u'VBN')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'led', u'VBN')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'personal-care', u'JJ')]),
ImmutableTree('VP', [(u'also', u'RB'), (u'had', u'VBD'), (u'strong', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'composite', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'per-share', u'JJ')]),
ImmutableTree('VP', [(u'Third-quarter', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'real', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'Airlines', u'NNPS'), (u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'year-before', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'other', u'JJ')]),
ImmutableTree('VP', [(u'well', u'RB'), (u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'``', u'``'), (u'There', u'EX'), (u'was', u'VBD')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'or', u'CC'), (u'more', u'RBR'), (u'per-share', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'was', u'VBD'), (u'named', u'VBN')]),
ImmutableTree('VP', [(u'pace', u'VB')]),
ImmutableTree('VP', [(u'about', u'RB'), (u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'continued', u'VBD')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'Confectionery', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'total', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'its', u'PRP$'), (u'common', u'JJ')]),
ImmutableTree('VP', [(u'Conceivably', u'RB'), (u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'and', u'CC')]),
ImmutableTree('VP', [(u'per-share', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'concern', u'VBP'), (u'.', u'.')]),
ImmutableTree('VP', [(u'its', u'PRP$'), (u'highly', u'RB'), (u'profitable', u'JJ')]),
ImmutableTree('VP', [(u'bank', u'VBP'), (u'had', u'VBD')]),
ImmutableTree('VP', [(u'buy', u'VB')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'Facilities', u'NNPS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'make', u'VB')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'and', u'CC'), (u'focusing', u'VBG'), (u'more', u'RBR'), (u'carefully', u'RB')]),
ImmutableTree('VP', [(u'concern', u'VBP'), (u'.', u'.')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'had', u'VBD'), (u'bought', u'VBN'), (u'back', u'RB')]),
ImmutableTree('VP', [(u'Products', u'NNPS')]),
ImmutableTree('VP', [(u'&', u'CC')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'wanted', u'VBD'), (u'his', u'PRP$')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'close', u'VB')]),
ImmutableTree('VP', [(u'third-quarter', u'JJ'), (u'and', u'CC'), (u'nine-month', u'JJ')]),
ImmutableTree('VP', [(u'Airlines', u'NNPS'), (u'and', u'CC')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'old', u'JJ'), (u',', u','), (u'has', u'VBZ')]),
ImmutableTree('VP', [(u'could', u'MD'), (u'say', u'VB'), (u'their', u'PRP$')]),
ImmutableTree('VP', [(u'old', u'JJ'), (u',', u','), (u'was', u'VBD'), (u'named', u'VBN')]),
ImmutableTree('VP', [(u',', u','), (u"''", u"''")]),
ImmutableTree('VP', [(u'were', u'VBD'), (u'flat', u'JJ')]),
ImmutableTree('VP', [(u'aft', u'JJ')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'real', u'JJ')]),
ImmutableTree('VP', [(u'old', u'JJ'), (u',', u','), (u'served', u'VBN')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.'), (u'-RCB-', u')')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u','), (u'had', u'VBD')]),
ImmutableTree('VP', [(u'are', u'VBP'), (u'prepaying', u'VBG'), (u'their', u'PRP$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'only', u'RB'), (u'big', u'JJ')]),
ImmutableTree('VP', [(u'$', u'$'), (u'300-a-share', u'JJ')]),
ImmutableTree('VP', [(u',', u','), (u'blending', u'VBG')]),
ImmutableTree('VP', [(u'added', u'VBD'), (u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'lead', u'VB')]),
ImmutableTree('VP', [(u'was', u'VBD'), (u'mediocre', u'JJ'), (u',', u','), (u'but', u'CC'), (u'great', u'JJ'), (u'everywhere', u'RB'), (u'else', u'RB'), (u',', u','), (u'that', u'WDT'), (u'would', u'MD'), (u'be', u'VB'), (u'fine', u'JJ'), (u',', u','), (u"''", u"''"), (u'says', u'VBZ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u';', u':')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'World-wide', u'JJ')]),
ImmutableTree('VP', [(u'and', u'CC')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'increased', u'VBN')]),
ImmutableTree('VP', [(u'strong', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'filling', u'VBG')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u','), (u'but', u'CC'), (u'just', u'RB'), (u'$', u'$')]),
ImmutableTree('VP', [(u'earlier', u'RBR'), (u',', u',')]),
ImmutableTree('VP', [(u'earlier', u'RBR'), (u'.', u'.')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'His', u'PRP$')]),
ImmutableTree('VP', [(u',', u','), (u'based', u'VBN')]),
ImmutableTree('VP', [(u'only', u'RB')]),
ImmutableTree('VP', [(u'had', u'VBD'), (u'been', u'VBN'), (u'executive', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u';', u':')]),
ImmutableTree('VP', [(u'``', u'``')]),
ImmutableTree('VP', [(u'eighth', u'JJ')]),
ImmutableTree('VP', [(u'.', u'.')]),
ImmutableTree('VP', [(u'continuing', u'VBG')]),
ImmutableTree('VP', [(u'that', u'WDT'), (u'provide', u'VBP'), (u'hefty', u'JJ')]),
ImmutableTree('VP', [(u'fuel', u'VB')]),
ImmutableTree('VP', [(u'stood', u'VBD')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u',', u','), (u'citing', u'VBG'), (u'depressed', u'JJ')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'world-wide', u'JJ')]),
ImmutableTree('VP', [(u'least', u'JJS')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u'operating', u'VBG')]),
ImmutableTree('VP', [(u',', u',')]),
ImmutableTree('VP', [(u"'s", u'POS')]),
ImmutableTree('VP', [(u'$', u'$')]),
ImmutableTree('VP', [(u'supply', u'VB')]),
ImmutableTree('VP', [(u"'s", u'POS')])]
In [30]:
# misses a lot of single-word VP
# incorrectly adds a lot of adjectives, POS and punctuation
In [31]:
# c)
# does slightly better than the baseline chunker (but probably not good enough to be used productively as it is)
In [ ]:
Content source: JuliaNeumann/nltk_book_exercises
Similar notebooks: