In [ ]:
#P1 Encryption/Decryption/Frequency Analysis

#English frequency data:
#http://jnicholl.org/Cryptanalysis/Data/EnglishData.php

class Cgram:
    def __init__(self,mono=4,di=3,tri=2,wlist=1,tol=10):
        self.Weights={'Mono':mono,'Di':di,'Tri':tri,'List':wlist}
        Monograms=self.loadgram('monogram.dat')
        Digram=self.loadgram('digram.dat')
        Trigram=self.loadgram('trigram.dat')
        Polygram={}#Not implemented yet
        self.WList=self.loadWList('pword.txt')
        self.grams=[Monograms,Digram,Trigram,Polygram]
        self.cipstr=""
        self.plnstr=""
        self.letters='abcdefghijklmnopqrstuvwxyz'
        self.tolerance=float(tol)/100.00
        self.decodex=dd(dd)
    def loadWList(self,path):
        temp=dd()
        with open(path) as f:
            for i in f:
                #res=i.split()
                #temp[res[0]]=res[1:]
                j=i.find(' ')
                temp[i[:j]]=i[j:].split()
        return temp
    def loadgram(self,path):
        temp=dd(int)
        with open(path) as f:
            for i in f:
                j=i.find(' ')
                temp[i[:j].lower()]=-int(i[j:])
                #res=i.split()
                #temp[res[-1]]=res[:-1]
        return temp
    def search(self,s):
        '''<<Meta-function>> Do a subsearch for each type of implemented gram'''
        self.rep=[]#dd(int) #rep = "Report"
        for i in range(3):
            self.rep+=[self.subsearch(s,i+1)]
        self.cipstr=s
        self.plnstr=s.upper()
        #print self.rep
    def mapper(self):
        #Throw the found grams into a maxheap and pair them with their correponding highest
        #freq contender for each size. for each char in each gram, raise the confidence of
        #a mapping by one
        #e.g. 'ABC' matches 'CRE' and 'AB' matches 'TE': {'A':[['C',1]['T',1]],'B':[[R,1],[E,1]],etc...}
        #When mapping is complete, look for highest confidences and create a decoder dict to run the
        #ciphertext through
        decodex=dd(dd) #Dictionary of dictionaries of char:confidence parings
        dictkeys=['Mono','Di','Tri']
        #NEED to sort everything largest to smallest frequency
        for i in range(3): #i+1 is the generated {mono,di,tri}graph set
            j2=self.dictToHeap(self.grams[i])#;print "\nGiven {0}-Gram: ".format(i+1),j2
            j=self.dictToHeap(self.rep[i])#;print "\nReported {0}-Gram: ".format(i+1),j
            while (j and j2):
                k2=heappop(j)[1]
                k=heappop(j2)[1]
                for l in range(i):
                    try:
                        decodex[k[l]][k2[l]]+=self.Weights[dictkeys[i]]
                    except:
                        decodex[k[l]][k2[l]]=self.Weights[dictkeys[i]]
                    try:
                        decodex[k[l]]['Total']+=self.Weights[dictkeys[i]]
                    except:
                        decodex[k[l]]['Total']=self.Weights[dictkeys[i]]
        #By the end of this function, there should be character mappings based on frequency comparisons
        self.decodex=decodex
    def mapper2(self):
        s=self.cipstr
        t1=re.sub(r'[\W_]+',' ',s.lower()).split() #Extract all words
        for s in t1: #For each word in cipher
            try:
                for itera in self.WList[self.pata(s)]: #For each possibility for s pattern
                    j2=itera #Potential Word
                    j=s #Ciphered word
                    for k in range(len(j)): 
                        #If the likelihood of the a letter corresponding based on frequencies is high enough
                        #add to its likelihood for each possible matching word
                        if float(self.decodex[j[k]][j2[k]])/float(self.decodex[j[k]]['Total'])>self.tolerance:
                            try:
                                self.decodex[j[k]][j2[k]]+=self.Weights['List']
                            except:
                                self.decodex[j[k]][j2[k]]=self.Weights['List']
                            try:
                                self.decodex[j[k]]['Total']+=self.Weights['List']
                            except:
                                self.decodex[j[k]]['Total']=self.Weights['List']
            except:
                pass
        return self.decodex
    def makeMap(self):
        #for i in Temp.keys():
        #print i
        #for j in Temp[i].keys():
        #    print '{0}={1}: {2}'.format(i,j,Temp[i][j])
        pass
    def dictToHeap(self,dic):
        temp=[]
        for i in dic.keys():
            heappush(temp,[dic[i],i])
        return temp
    def subsearch(self,s,n):
        '''Enumerate all n-grams within s using a dynamic dictionary'''
        '''Something tricky here to use the heapq module as a maxheap:
        instead of adding 1 for each occurence of a gram, subtracting 1'''
        rep=dd(int)
        #t1=s.lower().split(' ')
        t1=re.sub(r'[\W_]+',' ',s.lower()).split() #Extract all words
        for s in t1: #For each word
            for iter in range(n): 
                for i in [(s[iter+i:iter+i+n]) for i in xrange(0, len(s), n) ]:
                    if len(i)==n: rep[i]-=1
        return rep
    def solve(self,s):
        self.search(s)
        self.mapper()
        self.mapper2()
        temp=[]
        for i in self.decodex.keys():
            for j in self.decodex[i]:
                if j!='Total': temp+=[[-self.decodex[i][j],i,j]]
        temp.sort()
        mapp=dd()
        buff=self.letters
        for i in temp:
            if (i[1] in buff):
                mapp[i[1]]=i[2]
                buff.replace(i[1],'')
        #print temp
        print "Frequency analysis maps the following: ",mapp
        self.failsafe()
    def pata(self,inS): #Pattern Analyzer
        '''Return a string yielding the character pattern of inS
        e.g. "racecar" would yield "ABCDCBA"'''
        temp=inS.lower()
        count=0
        for j in range(len(temp)):
            i=temp[j]
            if (ord(i)>=97 and ord(i)<=122):
                temp=temp.replace(i,chr(65+count))
                count+=1
        return temp
    def failsafe(self):
        '''If at first you don't succeed...'''
        maybes=set()
        for i in [1,3,5,7,9,11,13,17,19,21,23,25]:
            for j in range(25):
                temp=MAS(i,j).dec(ct)
                t1=re.sub(r'[\W_]+',' ',temp.lower()).split()
                good=0
                if len(t1)>10:
                    tries=10
                else:
                    tries=len(t1)
                for k in range(tries):
                    #if (i==9 and j==17): print t1[k]
                    try:
                        for l in self.WList[self.pata(t1[k])]:
                            if t1[k]==l:
                                good+=1
                    except:
                        pass
                if good>=5: maybes.add(temp)
        print "Possible Decryptions: ",maybes