#### read, statistics, quality, crypt, decrypt, Metropolis, likelihood functions:

``````

In :

import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline
import random
from numpy.random import rand
from copy import copy
from __future__ import division
import time

with open(filename, 'r') as f:
return X

with open(filename) as f:
X = X[:wordsnumber]
X = ''.join(X)
X = X.replace('\n', '')
return X

with open(filename) as f:
X = X.replace('\n', '')
return X

def get_unicount(text):
length = len(text)
counts = np.zeros(26)
for i in xrange(length):
c = ord(text[i])
counts[c-97]+=1
#97-122
return counts

def get_bigram_stats_dic(text):
length = len(text)
dic = {}
for i in 'abcdefghijklmnopqrstuvwxyz':
for j in 'abcdefghijklmnopqrstuvwxyz':
dic[(i,j)]=0

for i in xrange(length-1):
if (text[i], text[i+1]) in dic:
dic[(text[i], text[i+1])] += 1

for k,v in dic.items():
dic[k] = v/(counts[ord(k)-97])
return dic

def quality(decrypted, original):
l = len(decrypted)
zipped = zip(decrypted, original)
return sum(1.0 for x,y in zipped if x == y)/l

def crypt(text):
p = range(26)
random.shuffle(p)
output=''
for ch in text:
try:
x = ord(ch) - ord('a')
output+=(chr(p[x] + ord('a')))
except:
pass
return output, p

def get_desiredPDF_bigram(permutation):
logp = 0
for i in xrange(len(encrypted)-1):
pr = stats[chr(permutation[ord(encrypted[i])-97]+97),
chr(permutation[ord(encrypted[i+1])-97]+97)]
if pr>0:
logp += math.log(pr)
else:
logp += -9 #penalty for non existant pairs
return logp

def metropolis( desiredPDF, initValue, computableRVS, skipIterations = 1000):
random_variable = initValue
random_variableDensityValue = desiredPDF( random_variable )

for i in xrange( skipIterations ):
candidate = computableRVS( random_variable )
candidateDensityValue = desiredPDF( candidate )
acceptanceProb = min(0, candidateDensityValue - random_variableDensityValue )
if math.log(random.random()) < acceptanceProb:
random_variable = candidate
random_variableDensityValue = candidateDensityValue

while True:
candidate = computableRVS( random_variable )
candidateDensityValue = desiredPDF( candidate )

acceptanceProb = min( 0, candidateDensityValue - random_variableDensityValue )

if math.log(random.random()) < acceptanceProb:
random_variable = candidate
random_variableDensityValue = candidateDensityValue
yield random_variable, random_variableDensityValue

def uniform( n ):
#initialize permutation with identical
permutation = [ i for i in xrange( n ) ]

#swap ith object with random onject from i to n - 1 enclusively
for i in xrange( n ):
j = random.randint( i, n - 1 )
permutation[ i ], permutation[ j ] = permutation[ j ], permutation[ i ]

return permutation

def applyTransposition( basePermutation ):
n = len( basePermutation )

permutation = copy( basePermutation )

#apply n random transpositions (including identical) to base permutation
#     for i in xrange( n ):
k, l = random.randint( 0, n - 1 ), random.randint( 0, n - 1 )
permutation[ k ], permutation[ l ] = permutation[ l ], permutation[ k ]

return  permutation

def decrypt(permutation, encrypted):
decrypted = []
for i in encrypted:
decrypted.append(chr(permutation[ord(i)-97]+97))
return ''.join(decrypted)

``````

crypt with given permutation:

``````

In :

def crypt_given(permutation, text):
output=''
#     p.append(26)
for ch in text:
try:
x = ord(ch) - ord('a')
output+=(chr(permutation[x] + ord('a')))
except:
pass
return output

``````

fixed training text (War and Peace):

``````

In :

counts = get_unicount(train_text)
stats = get_bigram_stats_dic(train_text)

``````

vary size of train text (used War and Peace, total 3Mb)

``````

In :

sizes =  [1000,2000,4000,8000,16000,32000,64000,128000]
qs = list()
fixed_permutation = uniform(26)
for s in sizes:
print 'size of encrypted text: ',s
encrypted = crypt_given(fixed_permutation, original)
init_p = uniform(26)
#Metropolis here
st = time.time()
computableGen = lambda t: applyTransposition(t)
metropolisgenerator = \
metropolis(get_desiredPDF_bigram, init_p, computableGen, 1500)

y = float('-inf')
for i in xrange( 1000 ):
cand = metropolisgenerator.next()
if (cand > y):
y = cand
x = cand

et =  time.time() - st
print 'metropolis time: ', et

print 'best density among 1000 last iterations: ', y
print 'corresponding permutation: ', x

decrypted = decrypt(x, encrypted)
qs.append(quality(decrypted, original))

plt.plot(sizes[:len(qs)], qs)

``````
``````

size of encrypted text:  1000
metropolis time:  35.6748039722
best density among 1000 last iterations:  -12634.9801002
corresponding permutation:  [22, 24, 7, 10, 5, 13, 2, 17, 1, 19, 16, 8, 21, 23, 20, 25, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]
size of encrypted text:  2000
metropolis time:  70.1986711025
best density among 1000 last iterations:  -23482.4403826
corresponding permutation:  [22, 24, 7, 5, 23, 13, 2, 17, 1, 19, 16, 8, 21, 25, 20, 10, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]
size of encrypted text:  4000
metropolis time:  132.329380989
best density among 1000 last iterations:  -55905.3265833
corresponding permutation:  [6, 10, 13, 5, 23, 7, 22, 17, 1, 8, 9, 3, 12, 16, 0, 21, 19, 4, 18, 11, 14, 25, 15, 24, 20, 2]
size of encrypted text:  8000
metropolis time:  261.615154028
best density among 1000 last iterations:  -89693.449803
corresponding permutation:  [22, 24, 7, 5, 23, 13, 2, 17, 1, 19, 16, 8, 21, 25, 20, 10, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]
size of encrypted text:  16000
metropolis time:  558.243676901
best density among 1000 last iterations:  -177140.316335
corresponding permutation:  [22, 24, 7, 5, 23, 13, 2, 17, 1, 19, 16, 8, 21, 25, 20, 10, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]
size of encrypted text:  32000
metropolis time:  1014.72146583
best density among 1000 last iterations:  -349751.654302
corresponding permutation:  [22, 24, 7, 5, 23, 13, 2, 17, 1, 19, 16, 8, 21, 25, 20, 10, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]
size of encrypted text:  64000
metropolis time:  2261.47678995
best density among 1000 last iterations:  -699311.590259
corresponding permutation:  [22, 24, 7, 5, 23, 13, 2, 17, 1, 19, 16, 8, 10, 25, 20, 21, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]
size of encrypted text:  128000
metropolis time:  4571.27480602
best density among 1000 last iterations:  -1381820.9484
corresponding permutation:  [22, 24, 7, 5, 23, 13, 2, 17, 1, 19, 16, 8, 21, 25, 20, 10, 0, 4, 18, 11, 14, 9, 12, 3, 6, 15]

Out:

``````

compare war and peace and shakespeare as training sets

``````

In [ ]:

``````