In [1]:
# https://spacy.io/usage/linguistic-features
In [1]:
import spacy
nlp = spacy.load('de')
In [36]:
text = 'Apple erwägt ein Startup in UK für zehn Millarden Dollar zu kaufen.'
In [37]:
doc = nlp(text)
In [38]:
type(doc)
Out[38]:
In [39]:
# https://spacy.io/usage/linguistic-features
# all attributes: https://spacy.io/api/token#attributes
for token in doc:
print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
token.shape_, token.is_alpha, token.is_stop)
In [40]:
spacy.explain('ADP')
Out[40]:
In [41]:
# https://spacy.io/models/de#de_core_news_sm
# https://spacy.io/usage/spacy-101#vectors-similarity
for token in doc:
for token2 in doc:
if token2.pos_ == 'NOUN':
print(token.text, token2.text, token.similarity(token2))
In [42]:
from spacy import displacy
# https://spacy.io/usage/visualizers
In [43]:
displacy.render(doc, style='dep', jupyter=True)
# displacy.serve(doc, style='dep')
In [44]:
spacy.explain('sb')
Out[44]:
In [45]:
spacy.explain('oc')
Out[45]:
In [26]:
spacy.explain('oa')
Out[26]:
In [27]:
spacy.explain('nk')
Out[27]:
In [28]:
spacy.explain('mnr')
Out[28]:
In [46]:
spacy.explain('pm')
Out[46]:
In [47]:
spacy.explain('mo')
Out[47]:
In [48]:
spacy.explain('PROPN')
Out[48]:
In [49]:
spacy.explain('ADP')
Out[49]:
In [50]:
spacy.explain('PART')
Out[50]:
In [51]:
for chunk in doc.noun_chunks:
print(chunk.text, chunk.root.text, chunk.root.dep_,
chunk.root.head.text)
In [52]:
for token in doc:
print(token.text, token.dep_, token.head.text, token.head.pos_,
[child for child in token.children])
In [53]:
displacy.render(doc, style='ent', jupyter=True)
In [ ]: