WordNet Similarity


In [8]:
from nltk.corpus import wordnet as wn
from itertools import product

wordx, wordy = "phone","tablet"
sem1, sem2 = wn.synsets(wordx), wn.synsets(wordy)

In [9]:
sem1


Out[9]:
[Synset('telephone.n.01'),
 Synset('phone.n.02'),
 Synset('earphone.n.01'),
 Synset('call.v.03')]

In [10]:
sem2


Out[10]:
[Synset('tablet.n.01'),
 Synset('pad.n.01'),
 Synset('tablet.n.03'),
 Synset('pill.n.02')]

In [11]:
prod = list(product(*[sem1,sem2]))

In [12]:
prod[1][0].wup_similarity(prod[1][1])


Out[12]:
0.25

In [13]:
maxscore = 0.0
for i,j in prod:
    score = i.wup_similarity(j) # Wu-Palmer Similarity
    if score is not None:
        if maxscore < score:
            maxscore = score

In [15]:
print("Similarity between phone and tablet is : " + str(maxscore))


Similarity between phone and tablet is : 0.5882352941176471

Contextual Similarity


In [3]:
import nltk

text = nltk.Text(word.lower() for word in nltk.corpus.brown.words())
text.similar('woman')


man time day year car moment world house family child country boy
state job place way war girl work word

In [4]:
similar_words = text._word_context_index.similar_words('woman')
print(' '.join(similar_words))


man number time world fact end year state house way day use part kind boy matter problem result girl group

In [5]:
text._word_context_index.common_contexts(['cat', 'dog'])


Out[5]:
FreqDist({('a', 'a'): 2, ('a', 'he'): 2, ('the', 'and'): 2})

In [6]:
len(text._word_context_index.common_contexts(['brown', 'red']))


Out[6]:
3

In [7]:
text


Out[7]:
<Text: the fulton county grand jury said friday an...>

In [17]:
x = {}
x["hi"] = "there"

In [18]:
x


Out[18]:
{'hi': 'there'}

In [20]:
"there" in x


Out[20]:
False

In [ ]: