In [1]:
import spacy

In [2]:
nlp = spacy.load('en') ## Load spacy

Basic Parsing


In [3]:
## Tokens

doc = nlp(u'Hello Rishu, this is a test nlp document.')

for tokens in doc:
    print(tokens)


Hello
Rishu
,
this
is
a
test
nlp
document
.

In [4]:
## Parts of speech, tags

for tokens in doc:
    print(tokens, " | ", tokens.pos, " | ", tokens.pos_, " | ")


Hello  |  91  |  INTJ  | 
Rishu  |  96  |  PROPN  | 
,  |  97  |  PUNCT  | 
this  |  90  |  DET  | 
is  |  87  |  AUX  | 
a  |  90  |  DET  | 
test  |  92  |  NOUN  | 
nlp  |  92  |  NOUN  | 
document  |  92  |  NOUN  | 
.  |  97  |  PUNCT  | 

In [5]:
### itentifying sentences:

doc2 = nlp(u"This is a sentence. This is the first sentense. This is the third sentence")

for sentenses in doc2.sents:
    print(sentenses)


This is a sentence.
This is the first sentense.
This is the third sentence

In [6]:
doc2[5]


Out[6]:
This

In [7]:
type(doc2[5])


Out[7]:
spacy.tokens.token.Token

In [8]:
doc2[5].is_sent_start


Out[8]:
True

In [9]:
doc2[8].is_sent_start ## no output display. Returned None

In [ ]:

Tokenisation


In [10]:
doc3 = nlp(u"A 5km ride to London zone 1 cost £20.00")

In [11]:
for tokens in doc3:
    print(tokens.text)


A
5
km
ride
to
London
zone
1
cost
£
20.00

In [13]:
doc4 = nlp(u"A 5km ride to London zone 1 cost £20.00")

import dispacy


---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-13-f8c5e6921a3e> in <module>
      1 doc4 = nlp(u"A 5km ride to London zone 1 cost £20.00")
      2 
----> 3 import dispacy

ModuleNotFoundError: No module named 'dispacy'

In [ ]: