In [2]:
import pandas as pd
import numpy as np
%matplotlib inline

In [3]:
from pattern.en import parsetree

In [4]:
vcodes_df = pd.read_csv("vcodes.csv")

In [5]:
vcodes_df.head()


Out[5]:
code description
0 V010 Pedestrian injured in collision with pedal cyc...
1 V011 Pedestrian injured in collision with pedal cyc...
2 V019 Pedestrian injured in collision with pedal cyc...
3 V020 Pedestrian injured in collision with two- or t...
4 V021 Pedestrian injured in collision with two- or t...

In [6]:
vcodes_df['parsetree'] = vcodes_df['description'].map(parsetree)

In [7]:
vcodes_df.head()


Out[7]:
code description parsetree
0 V010 Pedestrian injured in collision with pedal cyc... [(Pedestrian, injured, in, collision, with, pe...
1 V011 Pedestrian injured in collision with pedal cyc... [(Pedestrian, injured, in, collision, with, pe...
2 V019 Pedestrian injured in collision with pedal cyc... [(Pedestrian, injured, in, collision, with, pe...
3 V020 Pedestrian injured in collision with two- or t... [(Pedestrian, injured, in, collision, with, tw...
4 V021 Pedestrian injured in collision with two- or t... [(Pedestrian, injured, in, collision, with, tw...

In [8]:
ptree = vcodes_df.ix[118, 2]

In [9]:
vcodes_df.ix[118, 1]


Out[9]:
'Motorcycle rider injured in collision with car, pick-up truck or van: Unspecified motorcycle rider injured in nontraffic accident'

In [10]:
for sentence in ptree:
    for chunk in sentence.chunks:
        print chunk


Chunk('Motorcycle rider/NP')
Chunk('injured/VP')
Chunk('in/PP')
Chunk('collision/NP')
Chunk('with/PP')
Chunk('car/NP')
Chunk('pick-up truck/NP')
Chunk('van/NP')
Chunk('Unspecified motorcycle rider/NP')
Chunk('injured/VP')
Chunk('in/PP')
Chunk('nontraffic accident/NP')

In [12]:
ptree = vcodes_df.ix[377, 2]

In [14]:
for sentence in ptree:
    for chunk in sentence.chunks:
        print chunk


Chunk('Occupant/NP')
Chunk('of/PP')
Chunk('pick-up truck/NP')
Chunk('van injured/VP')
Chunk('in/PP')
Chunk('collision/NP')
Chunk('with/PP')
Chunk('car/NP')
Chunk('pick-up truck/NP')
Chunk('van/NP')
Chunk('Unspecified/VP')
Chunk('occupant/NP')
Chunk('of/PP')
Chunk('pick-up truck/NP')
Chunk('van injured/VP')
Chunk('in/PP')
Chunk('nontraffic accident/NP')

In [16]:
vcodes_df.ix[377, 1]


Out[16]:
'Occupant of pick-up truck or van injured in collision with car, pick-up truck or van: Unspecified occupant of pick-up truck or van injured in nontraffic accident'

In [23]:
for sentence in vcodes_df.ix[378, 2]:
    for chunk in sentence.chunks:
        print chunk


Chunk('Occupant/NP')
Chunk('of/PP')
Chunk('pick-up truck/NP')
Chunk('van injured/VP')
Chunk('in/PP')
Chunk('collision/NP')
Chunk('with/PP')
Chunk('car/NP')
Chunk('pick-up truck/NP')
Chunk('van/NP')
Chunk('Person/NP')
Chunk('injured/VP')
Chunk('while/PP')
Chunk('boarding/VP')
Chunk('alighting/ADJP')

In [24]:
vcodes_df.ix[378, 1]


Out[24]:
'Occupant of pick-up truck or van injured in collision with car, pick-up truck or van: Person injured while boarding or alighting'

In [26]:
sent = ptree[0]

In [30]:
vcodes_df['parsetree'].map(lambda x: len(x[0].subjects)).unique()


Out[30]:
array([0])

In [47]:
pu_truck = sent.chunks[2]

In [48]:
pu_truck.string


Out[48]:
u'pick-up truck'