``````

In [1]:

import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline
import random
from numpy.random import rand
from copy import copy
from __future__ import division
with open(filename) as f:
X = X[:wordsnumber]
X = ''.join(X)
X = X.replace('\n', '')
return X

with open(filename) as f:
X = X.replace('\n', '')
return X

def chop_text_to_size(text, size):
return text[:1024*1024*size]

with open(filename) as f:
X = X.replace('\n', '')
return X
def get_unicount(text):
length = len(text)
counts = np.zeros(26)
for i in xrange(length):
c = ord(text[i])
counts[c-97]+=1
#97-122
return counts
def get_bigram_stats_dic(text):
length = len(text)
dic = {}
for i in 'abcdefghijklmnopqrstuvwxyz':
for j in 'abcdefghijklmnopqrstuvwxyz':
dic[(i,j)]=0

for i in xrange(length-1):
if (text[i], text[i+1]) in dic:
dic[(text[i], text[i+1])] += 1

for k,v in dic.items():
dic[k] = v/(counts[ord(k[0])-97])
return dic
def quality(decrypted, original):
l = len(decrypted)
zipped = zip(decrypted, original)
return sum(1.0 for x,y in zipped if x == y)/l

def crypt(text):
p = range(26)
random.shuffle(p)
output=''
for ch in text:
try:
x = ord(ch) - ord('a')
output+=(chr(p[x] + ord('a')))
except:
pass
return output, p
def get_desiredPDF_bigram(permutation):
logp = 0
for i in xrange(len(encrypted)-1):
pr = stats[chr(permutation[ord(encrypted[i])-97]+97),
chr(permutation[ord(encrypted[i+1])-97]+97)]
if pr>0:
logp += math.log(pr)
else:
logp += -9 #penalty for non existant pairs
return logp

def uniform( n ):

#initialize permutation with identical
permutation = [ i for i in xrange( n ) ]

#swap ith object with random onject from i to n - 1 enclusively
for i in xrange( n ):
j = random.randint( i, n - 1 )
permutation[ i ], permutation[ j ] = permutation[ j ], permutation[ i ]

return permutation

def applyedTranspostions( basePermutation ):

n = len( basePermutation )

permutation = copy( basePermutation )
#apply n random transpositions (including identical) to base permutation
#     for i in xrange( n ):
k, l = random.randint( 0, n - 1 ), random.randint( 0, n - 1 )
permutation[ k ], permutation[ l ] = permutation[ l ], permutation[ k ]

return  permutation

``````

density maximization

``````

In [2]:

def densityMaximization( desiredPDF, initValue, computableRVS, skipIterations = 200 ):
"""
This function return a generator, which generates random variables
from some space S by trying to maximize givven density.
The algorithm is a modification of Metropolis-Hastings.
It rejects all objects, which decrease density.

Args:
desiredPDF (func) : PDF of desired distribution p( T ), where T from S
initValue : an object from S to initialize the starting point
of iterative proccess
computableRVS (func) : a generator of random value from space S
with given parameter T, which is also from S
skipIterations (int) : number of iterations to skip
(skipping more iterations leads to better accuracy?
but greater time consuming)

Returns: generator, which produce some values from S,
where each next value has no less density, and their denisity
"""

random_variable = initValue
random_variableDensityValue = desiredPDF( random_variable )
"""
A state of MCMC
"""

#ignore first iterations to let the iterative proccess to enter
#the high density regions
for i in xrange( skipIterations ):
candidate = computableRVS( random_variable )
candidateDensityValue = desiredPDF( candidate )
"""
next candidate for sample, generated by computableRVS
"""

if random_variableDensityValue < candidateDensityValue:
random_variable = candidate
random_variableDensityValue = candidateDensityValue

#now when the procces is in high density regions,
#return acceptable candidates
while True:
candidate = computableRVS( random_variable )
candidateDensityValue = desiredPDF( candidate )
"""
next candidate for sample, generated by computableRVS
"""

if random_variableDensityValue < candidateDensityValue:
random_variable = candidate
random_variableDensityValue = candidateDensityValue
yield random_variable, random_variableDensityValue

``````

decrypt

``````

In [3]:

def decrypt(permutation, encrypted):
decrypted = []
for i in encrypted:
decrypted.append(chr(permutation[ord(i)-97]+97))
return ''.join(decrypted)

``````

# Density maximization

### various number of iterations

``````

In [4]:

#TEST TEXT
fname = 'main/oliver_twist.txt'
#3 first symbols in oliver twist are unsupported by encryption
encrypted, p = crypt(original)
#TRAIN TEXT
counts = get_unicount(train_text)
stats = get_bigram_stats_dic(train_text)
# print stats
print p
bp = np.zeros(26, dtype=int)
for i in p:
bp[p[i]] = i
q = get_desiredPDF_bigram(bp)
print 'inverse to permutation used in encryption ', bp
print 'its density ', q
ra = uniform(26)
q = get_desiredPDF_bigram(ra)
print 'random permutation density ', q

``````
``````

[14, 4, 25, 8, 5, 3, 19, 13, 16, 0, 2, 18, 17, 20, 1, 15, 10, 24, 6, 12, 11, 21, 23, 7, 9, 22]
inverse to permutation used in encryption  [ 9 14 10  5  1  4 18 23  3 24 16 20 19  7  0 15  8 12 11  6 13 21 25 22 17
2]
its density  -56398.5046942
random permutation density  -115386.123802

``````
``````

In [10]:

import time
iterations = [250,500,1000,2000]
qualities = np.zeros((5, 4))
qualities[1, :] = qs;
init_p = uniform(26)
for i in xrange(1, 5):
for j, it in enumerate(iterations):
st = time.time()
computableGen = lambda t: applyedTranspostions(t)
dmgenerator = \
densityMaximization(get_desiredPDF_bigram, init_p, computableGen, it)
for k in xrange( 500 ):
x,y = dmgenerator.next()

et =  time.time() - st
print 'cold iterations: ', it
print 'dm time: ', et
print 'best density among 500 last iterations: ', y
print 'corresponding permutation: ', x
decrypted = decrypt(x, encrypted)
qualities[i, j] = quality(decrypted, original)
print 'quality: ', qualities[i, j]

``````
``````

cold iterations:  250
dm time:  51.9987618923
best density among 500 last iterations:  -60753.2486294
corresponding permutation:  [9, 14, 1, 5, 2, 4, 18, 25, 3, 10, 16, 20, 19, 7, 8, 22, 0, 15, 11, 6, 13, 21, 23, 12, 17, 24]
quality:  0.700396825397
cold iterations:  500
dm time:  69.4579558372
best density among 500 last iterations:  -61170.1552957
corresponding permutation:  [9, 0, 10, 24, 1, 4, 11, 23, 3, 12, 16, 20, 19, 7, 8, 15, 14, 5, 18, 6, 13, 21, 25, 22, 17, 2]
quality:  0.609577922078
cold iterations:  1000
dm time:  102.202694178
best density among 500 last iterations:  -59311.9901532
corresponding permutation:  [9, 14, 10, 5, 1, 4, 12, 23, 3, 24, 16, 20, 18, 7, 0, 15, 8, 19, 11, 6, 13, 21, 25, 22, 17, 2]
quality:  0.830988455988
cold iterations:  2000
dm time:  172.465212107
best density among 500 last iterations:  -60330.7553388
corresponding permutation:  [9, 14, 21, 5, 1, 4, 18, 25, 13, 24, 16, 20, 19, 7, 0, 15, 8, 6, 11, 2, 3, 10, 23, 22, 17, 12]
quality:  0.793244949495
cold iterations:  250
dm time:  51.6526489258
best density among 500 last iterations:  -71316.8796703
corresponding permutation:  [9, 19, 21, 1, 20, 13, 18, 23, 6, 15, 25, 7, 4, 0, 11, 22, 17, 24, 3, 5, 8, 10, 16, 2, 14, 12]
quality:  0.0623196248196
cold iterations:  500
dm time:  84.8463029861
best density among 500 last iterations:  -61574.3818021
corresponding permutation:  [10, 14, 25, 1, 5, 4, 18, 23, 3, 24, 16, 20, 22, 7, 0, 15, 8, 12, 11, 6, 13, 21, 9, 19, 17, 2]
quality:  0.840818903319
cold iterations:  1000
dm time:  140.283475161
best density among 500 last iterations:  -56398.5046942
corresponding permutation:  [9, 14, 10, 5, 1, 4, 18, 23, 3, 24, 16, 20, 19, 7, 0, 15, 8, 12, 11, 6, 13, 21, 25, 22, 17, 2]
quality:  1.0
cold iterations:  2000
dm time:  210.2876091
best density among 500 last iterations:  -56398.5046942
corresponding permutation:  [9, 14, 10, 5, 1, 4, 18, 23, 3, 24, 16, 20, 19, 7, 0, 15, 8, 12, 11, 6, 13, 21, 25, 22, 17, 2]
quality:  1.0
cold iterations:  250
dm time:  52.2484049797
best density among 500 last iterations:  -70597.7159037
corresponding permutation:  [9, 11, 23, 5, 12, 19, 18, 21, 7, 20, 25, 24, 4, 0, 17, 15, 13, 6, 14, 1, 3, 10, 16, 22, 8, 2]
quality:  0.147952741703
cold iterations:  500
dm time:  67.2549221516
best density among 500 last iterations:  -57406.4370758
corresponding permutation:  [9, 14, 25, 5, 15, 4, 18, 10, 3, 24, 16, 20, 19, 7, 0, 1, 8, 12, 11, 6, 13, 21, 23, 22, 17, 2]
quality:  0.948277417027
cold iterations:  1000
dm time:  137.225626945
best density among 500 last iterations:  -56398.5046942
corresponding permutation:  [9, 14, 10, 5, 1, 4, 18, 23, 3, 24, 16, 20, 19, 7, 0, 15, 8, 12, 11, 6, 13, 21, 25, 22, 17, 2]
quality:  1.0
cold iterations:  2000
dm time:  233.10056591
best density among 500 last iterations:  -56398.5046942
corresponding permutation:  [9, 14, 10, 5, 1, 4, 18, 23, 3, 24, 16, 20, 19, 7, 0, 15, 8, 12, 11, 6, 13, 21, 25, 22, 17, 2]
quality:  1.0
cold iterations:  250
dm time:  51.3295900822
best density among 500 last iterations:  -64506.7427554
corresponding permutation:  [25, 14, 21, 5, 24, 4, 18, 23, 3, 17, 9, 8, 19, 7, 0, 10, 20, 15, 12, 2, 13, 1, 16, 22, 11, 6]
quality:  0.647366522367
cold iterations:  500
dm time:  68.3474080563
best density among 500 last iterations:  -64942.2738534
corresponding permutation:  [25, 14, 10, 5, 1, 0, 18, 23, 22, 24, 16, 20, 3, 7, 4, 15, 8, 12, 17, 6, 13, 21, 9, 19, 11, 2]
quality:  0.534902597403
cold iterations:  1000
dm time:  152.182391167
best density among 500 last iterations:  -57106.4562551
corresponding permutation:  [9, 14, 10, 5, 1, 4, 18, 23, 3, 24, 16, 20, 19, 7, 0, 15, 8, 12, 11, 2, 13, 21, 25, 22, 17, 6]
quality:  0.955943362193
cold iterations:  2000
dm time:  230.563726902
best density among 500 last iterations:  -59947.9104811
corresponding permutation:  [9, 14, 10, 5, 1, 4, 18, 23, 3, 24, 16, 20, 22, 7, 0, 15, 8, 12, 11, 6, 13, 21, 25, 19, 17, 2]
quality:  0.888437950938

``````
``````

In [17]:

means = np.mean(qualities, 0)
stds = np.std(qualities, 0)
print(means)
print(stds)
plt.title('Dependence quolity on cold iterations')
plt.xlabel('iterations')
plt.ylabel('quolity')
plt.plot(iterations, means - stds, 'r:')
plt.plot(iterations, means + stds, 'r:')
plt.plot(iterations, means, 'b-')

``````
``````

[ 0.31160714  0.58671537  0.75738636  0.73633658]
[ 0.29997261  0.3294656   0.38371839  0.3761919 ]

``````
``````

In [ ]:

``````