In [10]:
import pandas as pd
df = pd.read_csv('../data/conll2000/test.txt', names=['token', 'pos', 'chunk'], 
                 sep=' ', skip_blank_lines=False)

In [11]:
df["sent_id"] = df.T.isnull().all().cumsum()
df.dropna(axis=0, inplace=True)

In [12]:



Out[12]:
token pos chunk sent_id
20 structural JJ B-NP 0
21 parts NNS I-NP 0
22 for IN B-PP 0
23 Boeing NNP B-NP 0
24 's POS B-NP 0
25 747 CD I-NP 0
26 jetliners NNS I-NP 0
27 . . O 0
29 Rockwell NNP B-NP 1
30 said VBD B-VP 1
31 the DT B-NP 1
32 agreement NN I-NP 1
33 calls VBZ B-VP 1
34 for IN B-SBAR 1
35 it PRP B-NP 1
36 to TO B-VP 1
37 supply VB I-VP 1
38 200 CD B-NP 1
39 additional JJ I-NP 1
40 so-called JJ I-NP 1

In [13]:
df.apply()


Out[13]:
<pandas.core.groupby.DataFrameGroupBy object at 0x114927810>

In [ ]: