In [1]:
    
def reverseComplement(seq):
    complementDict = {'A':'T', 'T':'A', 'G':'C', 'C':'G'}
    seq = ''.join([complementDict[i] for i in seq])
    return seq[::-1]
import networkx as nx
class deburijnGraph():
    def __init__(self, readList = [], k = 4):
        
        self.readList = []
        self.k = 5
        self.g = nx.MultiDiGraph()
        for read in readList:
            for i in xrange(len(read) - k + 1):
                for kmer in (read[i:i+k], reverseComplement(read[i:i+k])):
                    head = kmer[:-1]
                    tail = kmer[1:]
                    self.g.add_node(head)
                    self.g.add_node(tail)
                    self.g.add_edge(head, tail, label=kmer)
    
In [14]:
    
fp = open('GASM.txt')
readList = map(lambda x:x.strip(), fp.readlines())
fp.close()
print readList
    
    
In [3]:
    
dG = deburijnGraph(readList)
    
In [4]:
    
A=nx.to_agraph(dG.g)
A.graph_attr.update(label='Debruijn graph', fontsize='20.0')
A.node_attr.update(color='#336699', fontcolor='black')
A.edge_attr.update(color='#99CC33', fontcolor='black')
A.layout('circo')
A.draw('deburijnGraph.png')
    
In [5]:
    
from IPython import display
display.Image('deburijnGraph.png')
    
    Out[5]:
In [10]:
    
def is_semieulerian(G):
    start, end = None, None
    if G.is_directed():
        for n in G.nodes_iter():
            if G.in_degree(n) +1 == G.out_degree(n):
                if start != None: return False
                else: start = n
            elif G.in_degree(n) == G.out_degree(n)+1:
                if end != None: return False
                else: end = n
            elif G.in_degree(n) != G.out_degree(n):
                return False
        g = G.copy()
        g.add_edge(end, start)
        if not nx.is_strongly_connected(g):return False
    return True
def semi2eulerian(G):
    start = None
    end = None
    if G.is_directed():
        for n in G.nodes_iter():
            if G.in_degree(n) +1 == G.out_degree(n):
                start = n
            elif G.in_degree(n) == G.out_degree(n)+1:
                end = n
            elif G.in_degree(n) != G.out_degree(n):
                return False
    G.add_edge(end, start)
    return start, end
def eulerPath(g):
    startNode = None
    path = []
    if nx.is_eulerian(g):
        cycle = list(nx.eulerian_circuit(g, startNode))
        path = cycle
    elif is_semieulerian(g):
        g = g.copy()
        startNode, endNode = semi2eulerian(g)
        #print startNode, endNode
        #if nx.is_eulerian(g): print g.edges()
        cycle =  list(nx.eulerian_circuit(g, startNode))
        i = cycle.index((endNode, startNode))
        path = cycle[i+1:]+cycle[:i+1]
    return path
    
In [23]:
    
contigs = []
for i, subG in enumerate(nx.weakly_connected_component_subgraphs(dG.g, copy=True)):
    path = eulerPath(subG)
    superStr = ''
    walk = ''
    for p in path:
        walk += p[0]+'->'
        superStr += p[0][0]
    superStr += path[-1][0][1:]
    print walk[:-2]
    print superStr
    contigs.append(superStr)
    
    
In [29]:
    
for contig in contigs:
    print '-'*25
    print '>'+contig
    for read in readList:
        rev_read = reverseComplement(read)
        if read in contig:
            i = contig.index(read)
            print '+'+' '*i+read
        elif rev_read in contig:
            i = contig.index(rev_read)
            print '-'+' '*i+rev_read
        else:
            print ' '*len(contig)+read
    
    
In [ ]:
    
    
In [ ]: