In [1]:
from htmresearch.encoders.cio_encoder import CioEncoder
In [2]:
%matplotlib inline
In [3]:
import matplotlib.pyplot as plt
In [4]:
import matplotlib.image as mpimg
In [5]:
import numpy
In [6]:
encoder = CioEncoder(retina="en_synonymous")
In [7]:
def plotText(text):
fingerprint = encoder.encode(text)
img = numpy.zeros(128*128)
img[list(fingerprint['fingerprint']['positions'])]= 1
img = img.reshape((128,128))
plt.imshow(img, cmap="spectral")
print "Sparsity=",fingerprint['sparsity'],"bits=",len(fingerprint['fingerprint']['positions'])
In [8]:
def compareText(text1, text2):
f1 = encoder.encode(text1)['fingerprint']['positions']
f2 = encoder.encode(text2)['fingerprint']['positions']
print "Bits for '"+text1+"' = ",len(f1)
print "Bits for '"+text2+"' = ",len(f2)
overlap = len(set(f1) & set(f2))
print "Overlap=",overlap
return overlap
In [9]:
plotText("James quit smoking but it was not an easy decision.")
In [10]:
compareText("James quit smoking but it was not an easy decision.",
"James stopped smoking but it was not easy decision")
Out[10]:
In [11]:
compareText("James quit smoking but it was not an easy decision.",
"James stopped smoking but it was not a simple decision")
Out[11]:
In [12]:
compareText("James quit smoking but it was not an easy decision.",
"James stopped smoking but it was not a simple choice")
Out[12]:
In [13]:
compareText("James quit smoking but it was not an easy decision.",
"Sue stopped smoking but it was not a simple choice")
Out[13]:
In [14]:
compareText("James quit smoking but it was not an easy decision.",
"Sue stopped cigarettes but it was not a simple choice")
Out[14]:
In [15]:
compareText("James quit smoking but it was not an easy decision.",
"We start with ten base sentences each with ten words")
Out[15]:
In [16]:
compareText("James quit smoking but it was not an easy decision.",
"Sue keeps smoking a lot of cigarettes")
Out[16]:
In [17]:
plotText("the ski lift of projectors requires random resorts vacation chairs kids")
In [18]:
plotText("biology")
In [19]:
compareText("mountains vacation","skiing")
Out[19]:
In [20]:
compareText("James loved to puff on his cigarettes. However he recently read a bunch of articles describing their unhealthy effects. James decided to quit cigarettes completely. It clearly was not an easy decision, but he felt it was the right one.",
"The tobacco industry has long tried to hide the effects of second hand smoke. However there is now a preponderance of evidence demonstrating its ill effects. Second hand smoke is clearly dangerous to anyone who breathes it in.")
Out[20]:
In [21]:
compareText("James loved to puff on his cigarettes. However he recently read a bunch of articles describing their unhealthy effects. James decided to quit cigarettes completely. It clearly was not an easy decision, but he felt it was the right one.",
"the dangers of cigars. a cigar is basically a cancer stick. Cigars are known to contain over 7000 chemicals, at least 250 of which are directly tied to lung cancer.")
Out[21]:
In [22]:
compareText("James loved to puff on his cigarettes. However he recently read a bunch of articles describing their unhealthy effects. James decided to quit cigarettes completely. It clearly was not an easy decision, but he felt it was the right one.",
"people with health problems are turning to jogging as a solution. It raises heart rate and causes a rush of endorphins to the body. It also makes people more resilient to disease. It's the ultimate preventative medicine for health issues.")
Out[22]:
In [23]:
compareText("James","tobacco")
Out[23]:
In [24]:
plotText("advsh")
In [ ]: