In [ ]:
#P1 Encryption/Decryption/Frequency Analysis
#English frequency data:
#http://jnicholl.org/Cryptanalysis/Data/EnglishData.php
class Cgram:
def __init__(self,mono=4,di=3,tri=2,wlist=1,tol=10):
self.Weights={'Mono':mono,'Di':di,'Tri':tri,'List':wlist}
Monograms=self.loadgram('monogram.dat')
Digram=self.loadgram('digram.dat')
Trigram=self.loadgram('trigram.dat')
Polygram={}#Not implemented yet
self.WList=self.loadWList('pword.txt')
self.grams=[Monograms,Digram,Trigram,Polygram]
self.cipstr=""
self.plnstr=""
self.letters='abcdefghijklmnopqrstuvwxyz'
self.tolerance=float(tol)/100.00
self.decodex=dd(dd)
def loadWList(self,path):
temp=dd()
with open(path) as f:
for i in f:
#res=i.split()
#temp[res[0]]=res[1:]
j=i.find(' ')
temp[i[:j]]=i[j:].split()
return temp
def loadgram(self,path):
temp=dd(int)
with open(path) as f:
for i in f:
j=i.find(' ')
temp[i[:j].lower()]=-int(i[j:])
#res=i.split()
#temp[res[-1]]=res[:-1]
return temp
def search(self,s):
'''<<Meta-function>> Do a subsearch for each type of implemented gram'''
self.rep=[]#dd(int) #rep = "Report"
for i in range(3):
self.rep+=[self.subsearch(s,i+1)]
self.cipstr=s
self.plnstr=s.upper()
#print self.rep
def mapper(self):
#Throw the found grams into a maxheap and pair them with their correponding highest
#freq contender for each size. for each char in each gram, raise the confidence of
#a mapping by one
#e.g. 'ABC' matches 'CRE' and 'AB' matches 'TE': {'A':[['C',1]['T',1]],'B':[[R,1],[E,1]],etc...}
#When mapping is complete, look for highest confidences and create a decoder dict to run the
#ciphertext through
decodex=dd(dd) #Dictionary of dictionaries of char:confidence parings
dictkeys=['Mono','Di','Tri']
#NEED to sort everything largest to smallest frequency
for i in range(3): #i+1 is the generated {mono,di,tri}graph set
j2=self.dictToHeap(self.grams[i])#;print "\nGiven {0}-Gram: ".format(i+1),j2
j=self.dictToHeap(self.rep[i])#;print "\nReported {0}-Gram: ".format(i+1),j
while (j and j2):
k2=heappop(j)[1]
k=heappop(j2)[1]
for l in range(i):
try:
decodex[k[l]][k2[l]]+=self.Weights[dictkeys[i]]
except:
decodex[k[l]][k2[l]]=self.Weights[dictkeys[i]]
try:
decodex[k[l]]['Total']+=self.Weights[dictkeys[i]]
except:
decodex[k[l]]['Total']=self.Weights[dictkeys[i]]
#By the end of this function, there should be character mappings based on frequency comparisons
self.decodex=decodex
def mapper2(self):
s=self.cipstr
t1=re.sub(r'[\W_]+',' ',s.lower()).split() #Extract all words
for s in t1: #For each word in cipher
try:
for itera in self.WList[self.pata(s)]: #For each possibility for s pattern
j2=itera #Potential Word
j=s #Ciphered word
for k in range(len(j)):
#If the likelihood of the a letter corresponding based on frequencies is high enough
#add to its likelihood for each possible matching word
if float(self.decodex[j[k]][j2[k]])/float(self.decodex[j[k]]['Total'])>self.tolerance:
try:
self.decodex[j[k]][j2[k]]+=self.Weights['List']
except:
self.decodex[j[k]][j2[k]]=self.Weights['List']
try:
self.decodex[j[k]]['Total']+=self.Weights['List']
except:
self.decodex[j[k]]['Total']=self.Weights['List']
except:
pass
return self.decodex
def makeMap(self):
#for i in Temp.keys():
#print i
#for j in Temp[i].keys():
# print '{0}={1}: {2}'.format(i,j,Temp[i][j])
pass
def dictToHeap(self,dic):
temp=[]
for i in dic.keys():
heappush(temp,[dic[i],i])
return temp
def subsearch(self,s,n):
'''Enumerate all n-grams within s using a dynamic dictionary'''
'''Something tricky here to use the heapq module as a maxheap:
instead of adding 1 for each occurence of a gram, subtracting 1'''
rep=dd(int)
#t1=s.lower().split(' ')
t1=re.sub(r'[\W_]+',' ',s.lower()).split() #Extract all words
for s in t1: #For each word
for iter in range(n):
for i in [(s[iter+i:iter+i+n]) for i in xrange(0, len(s), n) ]:
if len(i)==n: rep[i]-=1
return rep
def solve(self,s):
self.search(s)
self.mapper()
self.mapper2()
temp=[]
for i in self.decodex.keys():
for j in self.decodex[i]:
if j!='Total': temp+=[[-self.decodex[i][j],i,j]]
temp.sort()
mapp=dd()
buff=self.letters
for i in temp:
if (i[1] in buff):
mapp[i[1]]=i[2]
buff.replace(i[1],'')
#print temp
print "Frequency analysis maps the following: ",mapp
self.failsafe()
def pata(self,inS): #Pattern Analyzer
'''Return a string yielding the character pattern of inS
e.g. "racecar" would yield "ABCDCBA"'''
temp=inS.lower()
count=0
for j in range(len(temp)):
i=temp[j]
if (ord(i)>=97 and ord(i)<=122):
temp=temp.replace(i,chr(65+count))
count+=1
return temp
def failsafe(self):
'''If at first you don't succeed...'''
maybes=set()
for i in [1,3,5,7,9,11,13,17,19,21,23,25]:
for j in range(25):
temp=MAS(i,j).dec(ct)
t1=re.sub(r'[\W_]+',' ',temp.lower()).split()
good=0
if len(t1)>10:
tries=10
else:
tries=len(t1)
for k in range(tries):
#if (i==9 and j==17): print t1[k]
try:
for l in self.WList[self.pata(t1[k])]:
if t1[k]==l:
good+=1
except:
pass
if good>=5: maybes.add(temp)
print "Possible Decryptions: ",maybes