This illustrates how to use global alignment to find a short sequence (e.g., a PCR primer) in a longer sequence (e.g., a 454 read) using Needleman-Wunsch alignment.
In [1]:
    
from skbio.core.alignment.pairwise import global_pairwise_align_nucleotide
    
In [15]:
    
s = "ACCGTGGACCGTTAGGATTGGACCCAAGGTTG"
t = "T"*25 + "ACCGTGGACCGTAGGATTGGACCAAGGTTA" + "A"*25
a = global_pairwise_align_nucleotide(s, t)
    
In [16]:
    
print s
print t
    
    
The alignment then looks like the following. Note that in scikit-bio 0.1.4 we do strict NW alignment, which means that the alignment always starts at the last position of both sequences. I need to fix this, as it's effectively useless for this problem due to the long artifical gap that it introduces.
In [3]:
    
print a.to_fasta()
    
    
We can then slice just the targeted region as follows.
In [4]:
    
gap_vector = a[0].gap_vector()
start_index = gap_vector.index(False)
end_index = (a.sequence_length() - 1) - gap_vector[::-1].index(False)
print a[0][start_index:end_index+1]
print a[1][start_index:end_index+1]
    
    
And count the mismatches and gaps.
In [5]:
    
mismatch_count = 0
for i in range(start_index, end_index+1):
    if a[0][i] != a[1][i]:
        mismatch_count += 1
print mismatch_count
    
    
Pulling some code from skbio and iab to explore this problem
In [12]:
    
import numpy as np
from skbio import Alignment, BiologicalSequence 
from skbio.core.alignment.pairwise import (_compute_score_and_traceback_matrices, _make_nt_substitution_matrix,
                                           _init_matrices_nw, _traceback, _get_seq_ids, blosum50, _init_matrices_nw_no_terminal_gap_penalty)
nt_substitution_matrix = _make_nt_substitution_matrix(1,-2)
traceback_decoding = {1: '\\', 2:'|', 3: '-', -1: 'E', 0: '*'}
def format_dynamic_programming_matrix(seq1, seq2, matrix, cell_width=6):
    """ define a function for formatting dynamic programming matrices
    """
    lines = []
    cell_format = "%" + str(cell_width) + "s"
    line_format = cell_format * (len(seq1) + 2)
    # print seq1 (start the line with two empty strings)
    lines.append(line_format % tuple([' ',' '] + map(str,list(seq1))))
    # iterate over the rows and print each (starting with the
    # corresponding base in sequence2)
    for row, base in zip(matrix,' ' + seq2):
        lines.append(line_format % tuple([base] + map(str,row)))
    return '\n'.join(lines)
def format_traceback_matrix(seq1, seq2, matrix, cell_width=3):
    translated_m = np.chararray(matrix.shape)
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            translated_m[i, j] = traceback_decoding[matrix[i, j]]
    return format_dynamic_programming_matrix(seq1, seq2, translated_m,
                                             cell_width)
def global_pairwise_align_(seq1, seq2, gap_open_penalty=10, gap_extend_penalty=5, 
                          substitution_matrix=blosum50):
    init_matrices_f = _init_matrices_nw_no_terminal_gap_penalty
    score_matrix, traceback_matrix = \
        _compute_score_and_traceback_matrices(
            seq1, seq2, gap_open_penalty, gap_extend_penalty,
            substitution_matrix, new_alignment_score=-np.inf,
            init_matrices_f=init_matrices_f,
            penalize_terminal_gaps=False)
    print format_dynamic_programming_matrix(seq1, seq2, score_matrix, )
    print format_traceback_matrix(seq1, seq2, traceback_matrix)
    end_row_position = traceback_matrix.shape[0] - 1
    end_col_position = traceback_matrix.shape[1] - 1
    aligned1, aligned2, score, seq1_start_position, seq2_start_position = \
        _traceback(traceback_matrix, score_matrix, seq1, seq2,
                   end_row_position, end_col_position)
    start_end_positions = [(seq1_start_position, end_col_position-1),
                           (seq2_start_position, end_row_position-1)]
    # Get ids to assign to the output sequences in the result Alignment object
    seq1_id, seq2_id = _get_seq_ids(seq1, seq2)
    return Alignment(
        [BiologicalSequence(aligned1, id=seq1_id),
         BiologicalSequence(aligned2, id=seq2_id)],
        score=score, start_end_positions=start_end_positions)
    
In [13]:
    
s = "HEAGAWGHEE"
t = "PAWHEAE"
print global_pairwise_align_(s, t).to_fasta()
    
    
In [7]:
    
    
In [7]:
    
    
In [7]:
    
    
In [7]: