Experiments in response to SO question.


In [1]:
from skbio.alignment import global_pairwise_align_nucleotide

seq_1 = 'ATCGATCGATCG'
seq_2 = 'ATCGATATCGATCG'

print "Sequences: "
print "     %s" % seq_1
print "     %s" % seq_2


Sequences: 
     ATCGATCGATCG
     ATCGATATCGATCG

In [2]:
alignment = global_pairwise_align_nucleotide(seq_1, seq_2)
al_1, al_2 = [alignment.get_seq(_id).__str__() for _id in alignment.ids()]

print "    nw alignment using scikit:"
print "        %s" % al_1
print "        %s" % al_2
print


    nw alignment using scikit:
        ------ATCGATCGATCG
        ATCGATATCGATCG----

/Users/caporaso/Dropbox/code/scikit-bio/skbio/alignment/_pairwise.py:594: UserWarning: make_identity_substitution_matrix is deprecated and will soon be replaced, though at the time of this writing the new name has not been finalized. Updates will be posted to issue #161: https://github.com/biocore/scikit-bio/issues/161
  warn("make_identity_substitution_matrix is deprecated and will soon be "
/Users/caporaso/Dropbox/code/scikit-bio/skbio/alignment/_pairwise.py:540: EfficiencyWarning: You're using skbio's python implementation of Needleman-Wunsch alignment. This is known to be very slow (e.g., thousands of times slower than a native C implementation). We'll be adding a faster version soon (see https://github.com/biocore/scikit-bio/issues/254 to track progress on this).
  "to track progress on this).", EfficiencyWarning)

In [3]:
alignment = global_pairwise_align_nucleotide(seq_1, seq_2, penalize_terminal_gaps=True)
al_1, al_2 = [alignment.get_seq(_id).__str__() for _id in alignment.ids()]

print "    nw alignment using scikit:"
print "        %s" % al_1
print "        %s" % al_2
print


    nw alignment using scikit:
        ATCG--ATCGATCG
        ATCGATATCGATCG


In [ ]: