In this notebook we played around with number of cold iterations.

``````

In [35]:

import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline
import random
from numpy.random import rand
from copy import copy
from __future__ import division
import time

``````

``````

In [36]:

with open(filename) as f:
X = X[:wordsnumber]
X = ''.join(X)
X = X.replace('\n', '')
return X

with open(filename) as f:
X = X.replace('\n', '')
return X

def chop_text_to_size(text, size):
return text[:1024*1024*size]

with open(filename) as f:
X = X.replace('\n', '')
return X

``````

counts

``````

In [37]:

def get_unicount(text):
length = len(text)
counts = np.zeros(26)
for i in xrange(length):
c = ord(text[i])
counts[c-97]+=1
#97-122
return counts

``````

bigram statistics

``````

In [ ]:

def get_bigram_stats_dic(text):
length = len(text)
dic = {}
for i in 'abcdefghijklmnopqrstuvwxyz':
for j in 'abcdefghijklmnopqrstuvwxyz':
dic[(i,j)]=0

for i in xrange(length-1):
if (text[i], text[i+1]) in dic:
dic[(text[i], text[i+1])] += 1

for k,v in dic.items():
dic[k] = v/(counts[ord(k[0])-97])
return dic

``````

quality

``````

In [68]:

def quality(decrypted, original):
l = len(decrypted)
zipped = zip(decrypted, original)
return sum(1.0 for x,y in zipped if x == y)/l

``````

crypt

``````

In [39]:

def crypt(text):
p = range(26)
random.shuffle(p)
output=''
for ch in text:
try:
x = ord(ch) - ord('a')
output+=(chr(p[x] + ord('a')))
except:
pass
return output, p

``````

unnormalized desiredPDF (likelihood)

``````

In [44]:

def get_desiredPDF_bigram(permutation):
logp = 0
for i in xrange(len(encrypted)-1):
pr = stats[chr(permutation[ord(encrypted[i])-97]+97),
chr(permutation[ord(encrypted[i+1])-97]+97)]
if pr>0:
logp += math.log(pr)
else:
logp += -9 #penalty for non existant pairs
return logp

``````

metropolis

``````

In [41]:

def metropolis( desiredPDF, initValue, computableRVS, skipIterations = iterations ):
random_variable = initValue
random_variableDensityValue = desiredPDF( random_variable )

for i in xrange( skipIterations ):
candidate = computableRVS( random_variable )
candidateDensityValue = desiredPDF( candidate )
"""
next candidate for sample, generated by computableRVS
"""

#acceptanceProb = min( 1, candidateDensityValue / random_variableDensityValue )
#logp is returnd by desiredPDF_bigram, so here is the change
acceptanceProb = min(0, candidateDensityValue - random_variableDensityValue )

if math.log(random.random()) < acceptanceProb:
random_variable = candidate
random_variableDensityValue = candidateDensityValue

#now when the procces is converged to desired distribution,
#return acceptable candidates
print "-----"
while True:
candidate = computableRVS( random_variable )
candidateDensityValue = desiredPDF( candidate )

#acceptanceProb = min( 1, candidateDensityValue / random_variableDensityValue )
# logp is returnd by desiredPDF_bigram, so here is the change
acceptanceProb = min( 0, candidateDensityValue - random_variableDensityValue )

if math.log(random.random()) < acceptanceProb:
random_variable = candidate
random_variableDensityValue = candidateDensityValue
yield random_variable, random_variableDensityValue

``````

permutation generator and computablervs

``````

In [43]:

def uniform( n ):

#initialize permutation with identical
permutation = [ i for i in xrange( n ) ]

#swap ith object with random onject from i to n - 1 enclusively
for i in xrange( n ):
j = random.randint( i, n - 1 )
permutation[ i ], permutation[ j ] = permutation[ j ], permutation[ i ]

return permutation

def applyTransposition( basePermutation ):

n = len( basePermutation )

permutation = copy( basePermutation )

#apply n random transpositions (including identical) to base permutation
#     for i in xrange( n ):
k, l = random.randint( 0, n - 1 ), random.randint( 0, n - 1 )
permutation[ k ], permutation[ l ] = permutation[ l ], permutation[ k ]

return  permutation

``````
``````

In [ ]:

decrypt

``````
``````

In [45]:

def decrypt(permutation, encrypted):
decrypted = []
for i in encrypted:
decrypted.append(chr(permutation[ord(i)-97]+97))
return ''.join(decrypted)

``````

Metropolis

various number of iterations

``````

In [46]:

#TEST TEXT
fname = 'main/oliver_twist.txt'
encrypted, p = crypt(original)
#TRAIN TEXT
counts = get_unicount(train_text)
stats = get_bigram_stats_dic(train_text)

print 'encrypting permutation: ', p
bp = np.zeros(26, dtype=int)
for i in p:
bp[p[i]] = i
q = get_desiredPDF_bigram(bp)
print 'inverse to encrypting permutation: ', bp
print 'its likelihood: ', q
ra = uniform(26)
q = get_desiredPDF_bigram(ra)
print 'likelihood of random permutation: ', q

``````
``````

[20, 18, 25, 0, 3, 14, 21, 9, 13, 24, 12, 8, 10, 19, 2, 11, 6, 1, 22, 17, 4, 15, 23, 7, 5, 16]
[ 3 17 14  4 20 24 16 23 11  7 12 15 10  8  5 21 25 19  1 13  0  6 18 22  9
2]
-56398.5046942
-106838.163693

``````

250,500,1000,2000 cold iterations, best of 500

``````

In [82]:

import time
iterations = [250,500,1000,2000]
qs = np.zeros(16)
for k in xrange(4):
j=0
init_p = uniform(26)
for it in iterations:
st = time.time()
computableGen = lambda t: applyTransposition(t)
metropolisgenerator = \
metropolis(get_desiredPDF_bigram, init_p, computableGen, it)
x = []
y = []
for i in xrange( 500 ):
a,b = metropolisgenerator.next()
x.append(a)
y.append(b)

et =  time.time() - st
print 'cold iterations: ', it
print 'metropolis time: ', et
best = np.argmax(y)
bestx = x[best]
print 'best density among ', 500, ' last iterations: ', y[best]
print 'corresponding permutation: ', bestx
decrypted = decrypt(bestx, encrypted)
qs[4*k+j] = quality(decrypted, original)
print 'quality: ', qs[4*k+j]
j+=1
plt.plot(iterations, qs[:4], iterations, qs[4:8], iterations, qs[8:12], iterations, qs[12:16])

``````
``````

-----
cold iterations:  250
metropolis time:  39.5499999523
best density among  500  last iterations:  -71665.7306791
corresponding permutation:  [18, 14, 17, 19, 12, 10, 16, 25, 24, 13, 20, 5, 21, 7, 15, 2, 9, 0, 1, 4, 11, 22, 8, 6, 23, 3]
quality:  0.021148989899
-----
cold iterations:  500
metropolis time:  56.0929999352
best density among  500  last iterations:  -69884.1866772
corresponding permutation:  [12, 3, 14, 17, 13, 15, 25, 23, 5, 4, 6, 24, 21, 8, 22, 2, 16, 19, 20, 18, 0, 10, 11, 7, 9, 1]
quality:  0.320797258297
-----
cold iterations:  1000
metropolis time:  82.0660002232
best density among  500  last iterations:  -59755.084299
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 22, 15, 10, 8, 5, 21, 25, 18, 1, 13, 0, 6, 12, 19, 9, 2]
quality:  0.806006493506
-----
cold iterations:  2000
metropolis time:  132.069000006
best density among  500  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  250
metropolis time:  38.4229998589
best density among  500  last iterations:  -69006.8175832
corresponding permutation:  [8, 17, 19, 14, 20, 5, 16, 25, 11, 7, 1, 6, 21, 0, 12, 15, 9, 18, 24, 3, 4, 10, 13, 22, 23, 2]
quality:  0.243912337662
-----
cold iterations:  500
metropolis time:  52.1320002079
best density among  500  last iterations:  -69254.7993549
corresponding permutation:  [22, 17, 14, 4, 20, 7, 9, 23, 19, 3, 12, 15, 1, 8, 10, 21, 25, 13, 2, 18, 24, 5, 0, 11, 16, 6]
quality:  0.425009018759
-----
cold iterations:  1000
metropolis time:  77.6610000134
best density among  500  last iterations:  -70974.3067524
corresponding permutation:  [13, 8, 19, 18, 7, 12, 25, 23, 20, 0, 5, 2, 21, 11, 1, 10, 16, 3, 6, 14, 17, 22, 4, 15, 9, 24]
quality:  0.00563672438672
-----
cold iterations:  2000
metropolis time:  126.657999992
best density among  500  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  250
metropolis time:  38.7739999294
best density among  500  last iterations:  -65255.5584126
corresponding permutation:  [19, 11, 14, 4, 24, 20, 16, 23, 17, 22, 7, 15, 10, 8, 5, 21, 9, 18, 1, 13, 0, 6, 3, 12, 25, 2]
quality:  0.550955988456
-----
cold iterations:  500
metropolis time:  51.6640000343
best density among  500  last iterations:  -58318.9504693
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 18, 21, 25, 19, 1, 13, 0, 6, 5, 22, 9, 2]
quality:  0.92306998557
-----
cold iterations:  1000
metropolis time:  84.2070000172
best density among  500  last iterations:  -61092.6927378
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 18, 5, 21, 25, 19, 1, 13, 0, 6, 8, 22, 9, 2]
quality:  0.870716089466
-----
cold iterations:  2000
metropolis time:  129.15500021
best density among  500  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  250
metropolis time:  38.5729999542
best density among  500  last iterations:  -63944.25672
corresponding permutation:  [3, 17, 4, 0, 14, 24, 9, 23, 11, 7, 12, 15, 10, 8, 5, 21, 16, 19, 1, 13, 20, 6, 18, 2, 25, 22]
quality:  0.64172979798
-----
cold iterations:  500
metropolis time:  50.746999979
best density among  500  last iterations:  -59095.4862605
corresponding permutation:  [3, 17, 14, 4, 20, 24, 9, 23, 22, 7, 12, 15, 10, 8, 5, 21, 16, 19, 11, 13, 0, 6, 18, 1, 25, 2]
quality:  0.912157287157
-----
cold iterations:  1000
metropolis time:  78.1519999504
best density among  500  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  2000
metropolis time:  129.299000025
best density among  500  last iterations:  -59398.0800175
corresponding permutation:  [3, 17, 0, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 14, 5, 21, 25, 19, 1, 13, 8, 6, 18, 22, 9, 2]
quality:  0.77141955267

Out[82]:

[<matplotlib.lines.Line2D at 0xc36bcc0>,
<matplotlib.lines.Line2D at 0xc36bef0>,
<matplotlib.lines.Line2D at 0xc375518>,
<matplotlib.lines.Line2D at 0xc375a90>]

``````
``````

In [ ]:

plt.savefig('Bigram,noWordDelimiter,metrop,varyColdIters')

``````

0 cold iterations perfomance (varying last iters):

``````

In [83]:

iterations = [250,500,1000,2000]
qs = np.zeros(16)
for k in xrange(4):
j=0
init_p = uniform(26)
for it in iterations:
st = time.time()
computableGen = lambda t: applyedTranspostions(t)
metropolisgenerator = \
metropolis(get_desiredPDF_bigram, init_p, computableGen, 0)
x = []
y = []
for i in xrange( it ):
a,b = metropolisgenerator.next()
x.append(a)
y.append(b)

et =  time.time() - st
print 'cold iterations: ',
print 'metropolis time: ', et
best = np.argmax(y)
bestx = x[best]
print 'best density among ', it, ' last iterations: ', y[best]
print 'corresponding permutation: ', bestx
decrypted = decrypt(bestx, encrypted)
qs[4*k+j] = quality(decrypted, original)
print 'quality: ', qs[4*k+j]
j+=1
plt.plot(iterations, qs[:4], iterations, qs[4:8], iterations, qs[8:12], iterations, qs[12:16])

``````
``````

-----
cold iterations:  metropolis time:  12.8880000114
best density among  250  last iterations:  -70648.2551412
corresponding permutation:  [13, 3, 19, 4, 8, 15, 9, 6, 12, 22, 1, 7, 23, 0, 20, 2, 16, 18, 10, 17, 14, 5, 11, 24, 25, 21]
quality:  0.123917748918
-----
cold iterations:  metropolis time:  26.8050000668
best density among  500  last iterations:  -65316.9725865
corresponding permutation:  [3, 13, 8, 4, 11, 15, 9, 2, 12, 7, 5, 10, 25, 14, 6, 21, 16, 19, 20, 17, 0, 24, 18, 22, 23, 1]
quality:  0.492153679654
-----
cold iterations:  metropolis time:  51.4409999847
best density among  1000  last iterations:  -63746.6548882
corresponding permutation:  [3, 17, 20, 4, 0, 5, 9, 25, 11, 7, 10, 1, 24, 14, 15, 21, 23, 19, 22, 13, 8, 6, 18, 2, 16, 12]
quality:  0.577245670996
-----
cold iterations:  metropolis time:  106.414999962
best density among  2000  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  metropolis time:  13.2339999676
best density among  250  last iterations:  -70030.0127475
corresponding permutation:  [12, 13, 0, 8, 20, 24, 10, 23, 3, 19, 22, 2, 25, 4, 6, 15, 16, 5, 11, 17, 14, 21, 18, 1, 9, 7]
quality:  0.106466450216
-----
cold iterations:  metropolis time:  26.1779999733
best density among  500  last iterations:  -70034.964249
corresponding permutation:  [5, 11, 0, 14, 20, 24, 16, 23, 7, 18, 15, 2, 10, 4, 6, 9, 25, 8, 22, 3, 19, 1, 17, 13, 21, 12]
quality:  0.0572691197691
-----
cold iterations:  metropolis time:  51.9179999828
best density among  1000  last iterations:  -61853.5329716
corresponding permutation:  [22, 13, 14, 4, 20, 6, 16, 23, 11, 7, 12, 24, 10, 8, 5, 21, 25, 19, 1, 18, 0, 15, 3, 17, 9, 2]
quality:  0.686327561328
-----
cold iterations:  metropolis time:  108.29700017
best density among  2000  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  metropolis time:  12.7339999676
best density among  250  last iterations:  -72731.4355596
corresponding permutation:  [0, 18, 8, 4, 13, 12, 25, 23, 1, 5, 6, 21, 10, 20, 7, 2, 16, 14, 24, 19, 17, 22, 3, 15, 9, 11]
quality:  0.136228354978
-----
cold iterations:  metropolis time:  25.6190001965
best density among  500  last iterations:  -59911.8196339
corresponding permutation:  [3, 12, 14, 4, 20, 24, 16, 25, 11, 7, 17, 15, 10, 8, 5, 21, 23, 18, 1, 13, 0, 6, 19, 22, 9, 2]
quality:  0.762490981241
-----
cold iterations:  metropolis time:  51.6050000191
best density among  1000  last iterations:  -58286.4085992
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 22, 10, 8, 5, 21, 25, 19, 1, 13, 0, 2, 18, 15, 9, 6]
quality:  0.910984848485
-----
cold iterations:  metropolis time:  104.585999966
best density among  2000  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0
-----
cold iterations:  metropolis time:  12.986000061
best density among  250  last iterations:  -73184.375206
corresponding permutation:  [20, 0, 18, 19, 22, 1, 25, 21, 14, 8, 5, 10, 23, 13, 15, 2, 16, 3, 24, 4, 17, 12, 11, 6, 9, 7]
quality:  0.000631313131313
-----
cold iterations:  metropolis time:  25.6989998817
best density among  500  last iterations:  -60800.1275249
corresponding permutation:  [3, 17, 14, 4, 20, 15, 16, 23, 11, 7, 21, 5, 10, 0, 22, 1, 9, 19, 12, 13, 8, 6, 18, 24, 25, 2]
quality:  0.703373015873
-----
cold iterations:  metropolis time:  53.864000082
best density among  1000  last iterations:  -70759.4506564
corresponding permutation:  [3, 14, 18, 13, 7, 12, 25, 23, 20, 8, 24, 1, 21, 17, 2, 22, 9, 19, 10, 4, 11, 5, 0, 15, 16, 6]
quality:  0.135326479076
-----
cold iterations:  metropolis time:  108.180000067
best density among  2000  last iterations:  -56398.5046942
corresponding permutation:  [3, 17, 14, 4, 20, 24, 16, 23, 11, 7, 12, 15, 10, 8, 5, 21, 25, 19, 1, 13, 0, 6, 18, 22, 9, 2]
quality:  1.0

Out[83]:

[<matplotlib.lines.Line2D at 0xc42af28>,
<matplotlib.lines.Line2D at 0xc43a198>,
<matplotlib.lines.Line2D at 0xc43a780>,
<matplotlib.lines.Line2D at 0xc43acf8>]

``````
``````

In [84]:

plt.savefig('Bigram,noWordDelimiter,metrop,varyIters')

``````
``````

<matplotlib.figure.Figure at 0xac4ff28>

``````
``````

In [2]:

import numpy as np
import matplotlib.pyplot as plt

% matplotlib inline

iterations = [250, 500, 1000, 2000]
stat_raw = np.array([
12.8880000114, 0.123917748918,
26.8050000668, 0.492153679654,
51.4409999847, 0.577245670996,
106.414999962, 1.0,

13.2339999676, 0.106466450216,
26.1779999733, 0.0572691197691,
51.9179999828, 0.686327561328,
108.29700017, 1.0,

12.7339999676, 0.136228354978,
25.6190001965, 0.762490981241,
51.6050000191, 0.910984848485,
104.585999966, 1.0,

12.986000061, 0.000631313131313,
25.6989998817, 0.703373015873,
53.864000082, 0.135326479076,
108.180000067, 1.0]).reshape((16, 2))
stat = np.array([stat_raw[::4, 1], stat_raw[1::4, 1], stat_raw[2::4, 1], stat_raw[3::4, 1]])
means = np.mean(stat, 1)
stds = np.std(stat, 1)
print(means)
print(stds)
plt.title('Dependence quality on cold iterations')
plt.xlabel('iterations')
plt.ylabel('quolity')
plt.plot(iterations, means - stds, 'r:')
plt.plot(iterations, means + stds, 'r:')
plt.plot(iterations, means, 'b-')

``````
``````

[ 0.09181097  0.5038217   0.57747114  1.        ]
[ 0.05369419  0.27671098  0.28221139  0.        ]

``````
``````

In [ ]:

``````