In [3]:
import nltk
from nltk.corpus import brown
from nltk.corpus import wordnet
from nltk.corpus import wordnet as w
In [4]:
brown.words() # just test that nltk corpus is downloaded, by printing some small corpus
Out[4]:
In [11]:
s = w.synsets('plan')
for a in s:
print "{}\t{}".format(a,a.definition())
In [19]:
''' synonyms and antonyms '''
synonyms = []
antonyms = []
for syn in wordnet.synsets("good"):
for l in syn.lemmas():
synonyms.append(l.name())
if l.antonyms():
antonyms.append(l.antonyms()[0].name())
print(set(synonyms))
print
print(set(antonyms))
In [24]:
s = w.synsets('plan')
print s[0].lemmas()
In [85]:
""" Similarity between two words
Threshold decided manually, can be tweaked after discussion
"""
def similarity(a, b):
suf=".n.01"
a, b = a+suf, b+suf
w1 = wordnet.synset(a)
w2 = wordnet.synset(b)
sim = w1.wup_similarity(w2)
#print sim,
output=""
if sim >= 0.85:
output="Very similar"
elif sim >= 0.65:
output="Little similar"
else:
output="Not similar"
print 'similarity({:>15}, {:15}) = {:<15} ==> {} '.format(a[:a.find('.')],b[:b.find('.')], sim, output)
In [92]:
sim = similarity
# very similar
print
sim("sparrow", "parrot")
sim("ship", "boat")
# little similar
print
sim("cat", "elephant")
# not similar
print
sim("dolphin", "ship")
sim("giraffe", "tiger")
sim("sheep", "ship")
sim("ship", "cat")
In [108]:
# few more
s = similarity
s("cobra", "animal")
s("ocean", "river")
s("nose", "ear")
s("female", "male")
s("forest", "tree")
s("banana", "mango")
s("cobra", "snake")