notebook.community

Edit and run



In [2]:

    
from __future__ import division
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

from types import ListType
from itertools import combinations, groupby, islice, imap
from collections import Counter
from subprocess import check_output
from operator import itemgetter
from StringIO import StringIO
import csv
import shlex
import sys
import glob
sys.path.append('/home/will/PySeqUtils/')
from GeneralSeqTools import fasta_reader, fasta_writer



In [30]:

    
def unique_justseen(iterable, key=None):
    "List unique elements, preserving order. Remember only the element just seen."
    # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
    # unique_justseen('ABBCcAD', str.lower) --> A B C A D
    return imap(next, imap(itemgetter(1), groupby(iterable, key)))



def check_overlap(tup1, tup2, overlap = 20):
    sa, Ia = tup1
    sb, Ib = tup2
    if ((Ia+len(sa)) - Ib) < overlap:
        return False
    else:
        nS = max(Ia, Ib)
        nE = min(Ia+len(sa), Ib+len(sb))
        A = sa[(nS-Ia):(nE-Ia)]
        B = sb[(nS-Ib):(nE-Ib)]
        #print A, B
        return A == B        

def guess_seq_col(row):
    wlets = 'ATCG-'
    skip_lets = '*HIVREFX:|1234567890'
    
    for num, col in enumerate(row):
        for l in skip_lets:
            if l in col:
                break
        if l in col:
            continue
        if any(l in col for l in wlets):
            print num, col
            return num
    print row
    raise KeyboardInterrupt
    
    
def get_reads(fname):
    if type(fname) is ListType:
        count = 0
        for f in fname:
            for (se, st, ind) in get_reads(f):
                yield se, st, 'join-read-%i' % count
                count += 1
    else:
        if fname.endswith('.bam'):
            cmd = 'samtools view %s' % fname
            out = check_output(shlex.split(cmd))
        else:
            with open(fname) as handle:
                out = ''.join(line for line in handle if not line.startswith('@'))
        reader = csv.reader(StringIO(out), delimiter = '\t')
        #seq_col = guess_seq_col(reader.next())
        getter = itemgetter(9, 3)
        reads = imap(getter, reader)
        for k, (se, st) in enumerate(reads):
            start = int(st)
            if start < 634: #fix the LTR issue
                start += 9086
            yield se, start, 'read-%i' % k
    
    
    
def get_IR_reads(reads):
    n_reads = sorted(reads, key = itemgetter(0,1))
    ir_reads = list(unique_justseen(n_reads, key = itemgetter(0,1)))
    print 'IR reads', len(ir_reads)
    return ir_reads
    

def initialize_graph(ir_reads):
    
    graph = nx.DiGraph()
    for seq, start, ind in ir_reads:
        graph.add_node(ind, seq = seq, start = int(start), end = int(start)+len(seq))
        
    nover = 0
    for (sa, Ia, indA), tup_rows in groupby(combinations(ir_reads,2), lambda tups: tups[0]):
        end_pos = Ia+len(sa)
        for _, (sb, Ib, indB) in tup_rows:
            if end_pos < Ib:
                break
            if check_overlap((sa, Ia), (sb, Ib), overlap=5):
                nover += 1
                graph.add_edge(indA, indB)
    return graph


def add_terminals(graph, ir_reads, start_pos, end_pos):
    
    graph.remove_nodes_from(['source', 'target'])
    
    graph.add_node('source')
    graph.add_node('target')
    
    starts = 0
    targets = 0
    for seq, start, ind in ir_reads:
        end = start+len(seq)
        if (start <= start_pos) & (end > start_pos):
            graph.add_edge('source', ind)
            starts += 1
        if (start < end_pos) and (end >= end_pos):
            graph.add_edge(ind, 'target')
            targets += 1
            
    #print 'Start edges: %i Sink Edges: %i' % (starts, targets)
    return graph


def generate_coverage_map(ir_reads, ax = None, label = None):
    
    pos_count = Counter()
    for seq, start, ind in ir_reads:
        for s in range(start, start+len(seq)):
            pos_count[s] += 1

    x = range(0, max(pos_count.keys()))
    y = [pos_count[p] for p in x]
    
    if ax is None:
        ax = plt.subplot(111)
    ax.plot(x, y, label = label)
    ax.set_ylabel('Coverage')
    
    return ax



In [69]:

    
def simple_paths(graph):
    
    #target_paths = nx.single_source_shortest_path(graph, 'target')
    target_dict = {}
    for node in graph.nodes_iter():
        try:
            target_dict[node] = nx.shortest_path(graph, node, 'target')
        except nx.NetworkXNoPath:
            pass
    #print len(target_dict)
    
    source_dict = nx.single_source_shortest_path(graph, source = 'source')
    #print len(source_dict)
    
    for key in (set(source_dict.keys()) & set(target_dict.keys())):
        #print source_dict[key], target_dict[key]
        yield source_dict[key] + target_dict[key][1:]


    
        
def get_common(col):
    try:
        return Counter(col[col != '-']).most_common(1)[0][0]
    except IndexError:
        return '-'

def adjust_pos(seq, start, hap_begin, hap_end):
    
    
    norm_start = start
    norm_stop = start + len(seq)
    
    ome_start = max(hap_begin, norm_start)
    hap_start = ome_start - hap_begin
    seq_start = max(0, hap_begin-norm_start)
    
    ome_stop = min(norm_stop, hap_end)
    hap_stop = ome_stop - hap_begin
    seq_stop = (hap_stop - hap_start) + seq_start
    
    return seq[seq_start:seq_stop], hap_start, hap_stop

def assemble_haps(paths, graph, hap_begin, hap_end):
    hap_len = hap_end-hap_begin
    haps = set()
    for num, path in enumerate(paths):
        if num % 50 == 0:
            print '%i paths, %i haps' % (num, len(haps))
        seq = np.empty((len(path)-2, hap_len), dtype=str)
        seq[:] = '-'
        for row, node_name in enumerate(path, -1):
            if (node_name != 'source') and (node_name != 'target'):
                node = graph.node[node_name]
                #print node['start'], node['start']+len(node['seq'])
                nseq, hap_start, hap_stop = adjust_pos(node['seq'], node['start'], hap_begin, hap_end)
                try:
                    seq[row, hap_start:hap_stop] = np.array(list(nseq))
                except ValueError:
                    pass
        tmp = ''.join([get_common(seq[:,col]) for col in range(seq.shape[1])])
        #if '-' in tmp:
        #    print 'bad one!'
        #    continue
        #    for row in range(len(path)-2):
        #        print ''.join(seq[row,:])
        #    for row, node_name in enumerate(path, -1):
        #        if (node_name != 'source') and (node_name != 'target'):
        #            node = graph.node[node_name]
        #            #print node['start'], node['start']+len(node['seq'])
        #            nseq, hap_start, hap_stop = adjust_pos(node['seq'], node['start'], hap_begin, hap_end)
        #            print node['seq'], node['start'], nseq, hap_start, hap_stop
        #    
        #    raise KeyboardInterrupt
        haps.add(tmp)
    print len(haps), 'unique haplotypes found!'
    return haps



In [47]:

    
bases = sorted(['DrexelMed.A0010',  'DrexelMed.A0017', 'DrexelMed.A0019', 'DrexelMed.A0107', 'DrexelMed.A0121',
         'sim_reads', 'DrexelMed.A0017.R02', 'DrexelMed.A0041.R02', 'DrexelMed.A0107.R02', 
         'DrexelMed.A0220'])
methods = ['lastz']#, 'bwa', 'ngm']

fig, axs = plt.subplots(len(bases),2, figsize = (10,20))

for num, bname in enumerate(bases):
    
    for method in methods:
        fname = '/home/will/DeepPipeline/Data/MappingResults/' + bname + '.' + method + '.map'
        all_reads = list(get_reads(fname))
       
        big_ax = axs[num, 0]
        zoom_ax = axs[num, 1]
    
        big_ax = generate_coverage_map(all_reads, ax = big_ax, label = method)
        big_ax.set_xlim([0, 9719])
        big_ax.set_title(bname)
        #big_ax.set_yscale('log')
    
        zoom_ax = generate_coverage_map(all_reads, ax = zoom_ax, label = method)
        zoom_ax.set_xlim([9086, 9719])
        zoom_ax.set_title(bname)
        #zoom_ax.set_yscale('log')
        if zoom_ax.is_first_row():
            zoom_ax.legend()
        
    

fig.tight_layout()
plt.savefig('/home/will/Downloads/HIVDeepSequencingMapping.png')



In [87]:

    
sys.path.append('/home/will/DeepPipeline/AnalysisCode/')



In [128]:

    
#reload(HapReconTools)

fname = '/home/will/DeepPipeline/Data/ShoRAHruns/DrexelMed.A0010.lastz/tmp.bam'
reads = sorted(HapReconTools.read_from_bam(fname), key = itemgetter(1))
ir_reads = list(HapReconTools.yield_IR_reads(reads))
graph = HapReconTools.generate_hap_graph(ir_reads)



In [139]:

    
for tup in islice(graph.in_degree_iter(), 5):
    print tup









    



(1664998900369866754, 0)
(7437370821412552708, 0)
(4038365460238549009, 1)
(3592963018831675410, 3)
(-815302913539727340, 0)



In [142]:

    
def new_simple_paths(graph):
    
    nodes = graph.in_degree_iter()
    for node, in_degree in nodes:
        if in_degree == 0:
            tmp_tree = nx.dfs_tree(graph, node)
            out_degree = tmp_tree.out_degree()
            for tree_node in tmp_tree.nodes_iter():
                if out_degree[tree_node] == 0:
                    path = nx.shortest_path(graph, node, tree_node)
                    if len(path) > 2:
                        yield path



In [143]:

    
def new_assemble_haps(paths, graph):
    haps = set()
    for num, path in enumerate(paths):
        if num % 500 == 0:
            print '%i paths, %i haps' % (num, len(haps))
        hap_begin = graph.node[path[0]]['start']
        hap_end = graph.node[path[-1]]['stop']
        seq = np.empty((len(path), hap_end-hap_begin), dtype=str)
        seq[:] = '-'
        for row, node_name in enumerate(path, 0):
            
            node = graph.node[node_name]
            nseq, hap_start, hap_stop = adjust_pos(node['seq'], node['start'], hap_begin, hap_end)
            
            try:
                seq[row, hap_start:hap_stop] = np.array(list(nseq))    
            except:
                print node['seq'], node['start']
                print nseq, hap_start, hap_stop
                seq[row, hap_start:hap_stop] = np.array(list(nseq))    
                #raise KeyboardInterrupt
            
                
        tmp = ''.join([get_common(seq[:,col]) for col in range(seq.shape[1])])
        haps.add((tmp, hap_begin))
    print len(haps), 'unique haplotypes found!'
    return haps
    
haps = new_assemble_haps(new_simple_paths(graph), graph)
sorted_haps = sorted(haps, key=itemgetter(0))
ir_haps = list(HapReconTools.yield_IR_reads(sorted_haps))
print 'IR haps:', len(ir_haps)









    



0 paths, 0 haps
500 paths, 500 haps
1000 paths, 1000 haps
1500 paths, 1500 haps
2000 paths, 2000 haps
2500 paths, 2500 haps
3000 paths, 3000 haps
3500 paths, 3500 haps
4000 paths, 4000 haps
4500 paths, 4500 haps
5000 paths, 5000 haps
5500 paths, 5500 haps
6000 paths, 6000 haps
6500 paths, 6500 haps
7000 paths, 7000 haps
7500 paths, 7500 haps
8000 paths, 8000 haps
8500 paths, 8500 haps
9000 paths, 9000 haps
9066 unique haplotypes found!
IR haps: 5586



In [132]:

    
def new_generate_coverage_map(ir_reads, ax = None, label = None):
    
    pos_count = Counter()
    for seq, start in ir_reads:
        for s in range(start, start+len(seq)):
            pos_count[s] += 1

    x = range(0, max(pos_count.keys()))
    y = [pos_count[p] for p in x]
    
    if ax is None:
        ax = plt.subplot(111)
    ax.plot(x, y, label = label)
    ax.set_ylabel('Coverage')
    
    return ax

new_generate_coverage_map(ir_haps)









    Out[132]:





<matplotlib.axes.AxesSubplot at 0x58fac10>



In [133]:

    
from collections import deque
from copy import deepcopy

def get_read_counts(ir_reads):
    
    read_counts = Counter(seq for seq, _, in ir_reads)
    return read_counts


def log_likelihood(urh, prh, ph):
    
    inner = ph*prh
    inner[inner > 0] = np.log10(inner[inner>0])
    
    return (urh*inner).sum()
    

def initialize(read_counter, haps):
    
    invalid_reads = []
    for read in read_counter.keys():
        keep = False
        for hap in haps:
            if read in hap:
                keep = True
                break
        if not keep:
            invalid_reads.append(read)
    print len(invalid_reads)
    for read in invalid_reads:
        del(read_counter[read])
    print 'valid reads!', sum(read_counter.values())
    sorted_uni_reads = sorted(read_counter.keys())
    sorted_uni_haps = sorted(haps)
    prh = np.zeros((len(sorted_uni_reads), len(sorted_uni_haps)))
    urh = np.zeros((len(sorted_uni_reads), len(sorted_uni_haps)))
    
    for row, read in enumerate(sorted_uni_reads):
        con_haps = [col for col, hap in enumerate(haps) if read in hap]
        if con_haps:
            prh[row, con_haps] = 1/read_counter[read]
            urh[row, con_haps] = read_counter[read]/len(con_haps)
    ph = np.ones((len(sorted_uni_haps),))/len(sorted_uni_haps)
    #ph = Ph(urh)
    
    return sorted_uni_reads, sorted_uni_haps, urh, prh, ph 


def Urh(urh, prh, ph):
    
    ur = urh.sum(axis=1).reshape(-1, 1)
    num = ph*prh
    pr = num.sum(axis=1).reshape(-1, 1)
    nurh = np.zeros_like(urh)
    if np.isnan(nurh).any().any():
        raise KeyboardInterrupt
    return ur*num/pr

def Ph(urh):
    
    ur = urh.sum(axis=1).reshape(-1,1)
    ph = (urh/ur)
    if np.isnan(ph).any().any():
        raise KeyboardInterrupt
    return ph.mean(axis=0)


def estimate_min_bound(ir_reads, hap_begin, hap_end):
    
    p = 0.9 #prob of finding a haplotype
    N = len(ir_reads)
    L = sum(len(seq) for seq, _, in ir_reads)/N #avg read length
    n = hap_end-hap_begin
    
    return -n*np.log(1-p**(1/n))/(N*L)


def do_EM(urh, prh, ph, max_rep = 10000, tol = 0.001):
    
    urh_l = deque([urh], maxlen = 2)
    ph_l = deque([ph], maxlen = 2)
    ll_l = deque([log_likelihood(urh_l[-1], prh, ph_l[-1])], maxlen = 2)
    
    for rep in range(1,max_rep):
        
        urh_l.append(Urh(urh_l[-1], prh, ph_l[-1]))
        ph_l.append(Ph(urh_l[-1]))
        ll_l.append(log_likelihood(urh_l[-1], prh, ph_l[-1]))
        
        if np.log10(rep) == int(np.log10(rep)):
            print ll_l[-1], rep, ph_l[-1].sum(), ph_l[-1].max()
        
        if abs(ll_l[-1] - ll_l[-2]) < tol:
            print 'reached tol!'
            return ph_l[-1]
    print 'exceded limit'
    return ph_l[-1]


def estimate_freqs(read_counts, haps, min_bound):
    
    exclude_haps = set()
    last_len = 0
    for rep in range(len(haps)):
        print rep, 'exluding %i haps but keeping %i' % (len(exclude_haps), len(haps-exclude_haps))
        sorted_uni_reads, sorted_uni_haps, urh, prh, ph = initialize(deepcopy(read_counts), haps-exclude_haps)
        nph = do_EM(urh, prh, ph)
        for p, hap in zip(nph.flatten(), sorted_uni_haps):
            if p < min_bound:
                exclude_haps.add(hap)
        if len(exclude_haps) == last_len:
            print 'done!'
            return sorted(zip(sorted_uni_haps, nph.flatten()), key = lambda x: x[1], reverse=True)
        last_len = len(exclude_haps)



In [136]:

    
read_counts = get_read_counts(HapReconTools.read_from_bam(fname))
thaps = set(seq for seq, _ in ir_reads)
res = estimate_freqs(read_counts, thaps, estimate_min_bound(ir_reads, 7000, 9000)/100)
print res[0]









    



0 exluding 0 haps but keeping 1710
0
valid reads! 2537
-8326.24098065 1 1.0 0.00528197674141
-8155.5189565 10 1.0 0.0128015200935
-8121.36469788 100 1.0 0.0201927616771
reached tol!
1 exluding 67 haps but keeping 1643
67
valid reads! 2470
-8059.35393822 1 1.0 0.00547898995225
-7891.67675757 10 1.0 0.0132561101894
-7857.23903998 100 1.0 0.0209193278306
reached tol!
2 exluding 131 haps but keeping 1579
131
valid reads! 2405
-7800.24862571 1 1.0 0.00612846691742
-7634.95910655 10 1.0 0.0143680994778
-7604.42356778 100 1.0 0.0217712002091
reached tol!
3 exluding 188 haps but keeping 1522
189
valid reads! 2347
-7572.44104621 1 1.0 0.00635685907487
-7409.57767999 10 1.0 0.0148468530046
-7379.06934364 100 1.0 0.0226944755043
reached tol!
4 exluding 242 haps but keeping 1468
245
valid reads! 2291
-7354.17722344 1 1.0 0.0065551463049
-7193.47234007 10 1.0 0.0153211833454
-7162.54198732 100 1.0 0.023710808989
reached tol!
5 exluding 293 haps but keeping 1417
301
valid reads! 2235
-7143.04125689 1 1.0 0.00675417176516
-6983.87987473 10 1.0 0.0157992651495
-6952.70702833 100 1.0 0.0244782199936
reached tol!
6 exluding 341 haps but keeping 1369
351
valid reads! 2185
-6950.04329596 1 1.0 0.00802725908848
-6784.41388471 10 1.0 0.0191847370065
-6751.17154145 100 1.0 0.0251829572513
reached tol!
7 exluding 387 haps but keeping 1323
399
valid reads! 2137
-6767.10172541 1 1.0 0.00826939173017
-6601.93605614 10 1.0 0.0197539192399
-6568.25198301 100 1.0 0.0259104739111
reached tol!
8 exluding 433 haps but keeping 1277
451
valid reads! 2083
-6559.15557055 1 1.0 0.00935237577098
-6389.35643721 10 1.0 0.0210592946606
-6357.97113579 100 1.0 0.0271878713575
reached tol!
9 exluding 478 haps but keeping 1232
494
valid reads! 2039
-6378.15640943 1 1.0 0.00964454102223
-6207.23793467 10 1.0 0.0216864883046
-6179.53295484 100 1.0 0.0310631468135
reached tol!
10 exluding 517 haps but keeping 1193
535
valid reads! 1995
-6211.26477319 1 1.0 0.00991216709121
-6045.40672438 10 1.0 0.0222615135926
-6017.8569957 100 1.0 0.0319434893018
reached tol!
11 exluding 551 haps but keeping 1159
571
valid reads! 1955
-6059.29990735 1 1.0 0.0102873236186
-5896.28826864 10 1.0 0.0228919259284
-5868.62783489 100 1.0 0.0327495306467
reached tol!
12 exluding 582 haps but keeping 1128
607
valid reads! 1915
-5911.39712356 1 1.0 0.0105533687336
-5754.69550787 10 1.0 0.023477727226
-5727.36978054 100 1.0 0.0336354428035
reached tol!
13 exluding 609 haps but keeping 1101
635
valid reads! 1887
-5803.53397649 1 1.0 0.0125241256811
-5638.89577478 10 1.0 0.0253631148866
-5615.7818225 100 1.0 0.0343361511768
reached tol!
14 exluding 633 haps but keeping 1077
659
valid reads! 1863
-5712.9537167 1 1.0 0.0127213474331
-5547.56830117 10 1.0 0.0257937751545
-5524.45828062 100 1.0 0.0349067010141
reached tol!
15 exluding 657 haps but keeping 1053
683
valid reads! 1838
-5615.8577623 1 1.0 0.012933672197
-5450.3960125 10 1.0 0.0262580343339
-5427.27539115 100 1.0 0.0355017739015
reached tol!
16 exluding 680 haps but keeping 1030
706
valid reads! 1815
-5525.27928023 1 1.0 0.0132370734051
-5361.0746666 10 1.0 0.0267722560136
-5337.91132262 100 1.0 0.0360654542563
reached tol!
17 exluding 703 haps but keeping 1007
734
valid reads! 1787
-5424.7189352 1 1.0 0.013502987797
-5261.10707067 10 1.0 0.0273615159057
-5237.92856889 100 1.0 0.0369460205771
reached tol!
18 exluding 726 haps but keeping 984
757
valid reads! 1764
-5338.72049655 1 1.0 0.0137400402447
-5175.08033022 10 1.0 0.0278202271292
-5151.89455252 100 1.0 0.0375595018786
reached tol!
19 exluding 749 haps but keeping 961
780
valid reads! 1741
-5252.88508931 1 1.0 0.013968408077
-5089.64294618 10 1.0 0.0282904655368
-5066.44386688 100 1.0 0.0381929300772
reached tol!
20 exluding 771 haps but keeping 939
806
valid reads! 1715
-5155.75457604 1 1.0 0.0147108584338
-4992.49511561 10 1.0 0.0291499741319
-4969.30020603 100 1.0 0.0389026525109
reached tol!
21 exluding 793 haps but keeping 917
829
valid reads! 1692
-5071.71111358 1 1.0 0.014973713277
-4908.4644168 10 1.0 0.0297133734475
-4885.26645293 100 1.0 0.0396129788176
reached tol!
22 exluding 814 haps but keeping 896
850
valid reads! 1671
-4994.08217154 1 1.0 0.0152067068887
-4831.18823715 10 1.0 0.0301888721392
-4807.98754805 100 1.0 0.0402424770086
reached tol!
23 exluding 834 haps but keeping 876
871
valid reads! 1650
-4917.14111993 1 1.0 0.0154500399201
-4754.14545063 10 1.0 0.030699985459
-4730.93931127 100 1.0 0.0409129020247
reached tol!
24 exluding 854 haps but keeping 856
893
valid reads! 1628
-4838.05075718 1 1.0 0.0157044270047
-4675.33400055 10 1.0 0.0312041920438
-4652.10434486 100 1.0 0.0415881078007
reached tol!
25 exluding 873 haps but keeping 837
914
valid reads! 1607
-4762.16167485 1 1.0 0.0159584307681
-4599.13276838 10 1.0 0.0317278565237
-4575.86889556 100 1.0 0.0422869765122
reached tol!
26 exluding 892 haps but keeping 818
934
valid reads! 1586
-4685.25388999 1 1.0 0.0162247760908
-4522.01982967 10 1.0 0.0322411694995
-4498.74823267 100 1.0 0.0429596172686
reached tol!
27 exluding 911 haps but keeping 799
954
valid reads! 1566
-4611.60818886 1 1.0 0.0164825797287
-4448.37845063 10 1.0 0.0327669970233
-4425.10462204 100 1.0 0.0436627909169
reached tol!
28 exluding 930 haps but keeping 780
974
valid reads! 1546
-4538.81342651 1 1.0 0.0167465073494
-4375.54694482 10 1.0 0.0332876726163
-4352.26600362 100 1.0 0.0443757343754
reached tol!
29 exluding 949 haps but keeping 761
993
valid reads! 1527
-4464.78364436 1 1.0 0.0173378278586
-4303.52111339 10 1.0 0.0340695731498
-4280.62726195 100 1.0 0.0450604305817
reached tol!
30 exluding 967 haps but keeping 743
1011
valid reads! 1508
-4395.14746977 1 1.0 0.0175953627874
-4235.26620755 10 1.0 0.0345720533219
-4212.37603019 100 1.0 0.0457407271942
reached tol!
31 exluding 983 haps but keeping 727
1029
valid reads! 1481
-4296.73561011 1 1.0 0.0178703172786
-4138.30882633 10 1.0 0.0351032895524
-4115.44789598 100 1.0 0.0464347874932
reached tol!
32 exluding 998 haps but keeping 712
1045
valid reads! 1464
-4234.93373957 1 1.0 0.0181391397874
-4076.37430085 10 1.0 0.0355943680958
-4053.46256753 100 1.0 0.0470650509319
reached tol!
33 exluding 1013 haps but keeping 697
1059
valid reads! 1450
-4182.69681503 1 1.0 0.0183592523229
-4024.64285352 10 1.0 0.0360357790974
-4001.77744683 100 1.0 0.0476571885097
reached tol!
34 exluding 1026 haps but keeping 684
1078
valid reads! 1430
-4115.88593535 1 1.0 0.0186626000048
-3957.8952276 10 1.0 0.0366300714538
-3934.99820914 100 1.0 0.0484481163398
reached tol!
35 exluding 1039 haps but keeping 671
1092
valid reads! 1416
-4065.24791264 1 1.0 0.018895820256
-3906.99161239 10 1.0 0.0370989914953
-3884.06007836 100 1.0 0.0490608358764
reached tol!
36 exluding 1052 haps but keeping 658
1107
valid reads! 1397
-3996.99492388 1 1.0 0.0191546812737
-3838.47265925 10 1.0 0.0376147615496
-3815.48881619 100 1.0 0.0497320921703
reached tol!
37 exluding 1065 haps but keeping 645
1128
valid reads! 1350
-3828.39393363 1 1.0 0.0195208649926
-3673.23628729 10 1.0 0.0383394629939
-3650.55603679 100 1.0 0.0506944670652
reached tol!
38 exluding 1077 haps but keeping 633
1140
valid reads! 1338
-3784.70316469 1 1.0 0.0197407399696
-3629.51593703 10 1.0 0.0387712398096
-3606.83529967 100 1.0 0.0512739491721
reached tol!
39 exluding 1089 haps but keeping 621
1154
valid reads! 1324
-3723.96302043 1 1.0 0.0309225461648
-3544.61953041 10 1.0 0.0733406955569
-3521.00830382 100 1.0 0.0742520464424
reached tol!
40 exluding 1101 haps but keeping 609
1166
valid reads! 1312
-3680.04062643 1 1.0 0.031276970236
-3500.83352432 10 1.0 0.0741958818142
-3477.21736981 100 1.0 0.0751166535655
reached tol!
41 exluding 1112 haps but keeping 598
1181
valid reads! 1294
-3607.19613916 1 1.0 0.0384410230303
-3431.77904774 10 1.0 0.0787195581887
-3409.83805393 100 1.0 0.078920879314
reached tol!
42 exluding 1122 haps but keeping 588
1199
valid reads! 1274
-3544.15041011 1 1.0 0.039122413854
-3368.40522877 10 1.0 0.0802753226175
-3346.4588633 100 1.0 0.0804811293649
reached tol!
43 exluding 1132 haps but keeping 578
1213
valid reads! 1260
-3496.78068035 1 1.0 0.039741165994
-3323.15765774 10 1.0 0.0815198197273
-3301.19408561 100 1.0 0.081731977341
reached tol!
44 exluding 1141 haps but keeping 569
1223
valid reads! 1250
-3458.16999535 1 1.0 0.0405725949301
-3286.50843185 10 1.0 0.0826363645051
-3264.51402893 100 1.0 0.0828630435587
reached tol!
45 exluding 1150 haps but keeping 560
1233
valid reads! 1240
-3421.24072032 1 1.0 0.0410763282625
-3250.66424299 10 1.0 0.0836021208195
-3228.67090685 100 1.0 0.0838341338541
reached tol!
46 exluding 1159 haps but keeping 551
1242
valid reads! 1231
-3388.35839836 1 1.0 0.041450127003
-3217.64650275 10 1.0 0.0843627084445
-3195.64662275 100 1.0 0.0845968291409
reached tol!
47 exluding 1168 haps but keeping 542
1252
valid reads! 1220
-3342.69731992 1 1.0 0.0442774696106
-3176.08097687 10 1.0 0.0863233920236
-3154.1920441 100 1.0 0.0865413703223
reached tol!
48 exluding 1177 haps but keeping 533
1263
valid reads! 1207
-3292.14128904 1 1.0 0.0513718555
-3121.04510527 10 1.0 0.0933274991548
-3099.36733309 100 1.0 0.0933830074289
reached tol!
49 exluding 1185 haps but keeping 525
1276
valid reads! 1194
-3245.02051668 1 1.0 0.0560307541371
-3067.42199355 10 1.0 0.100761116648
-3045.6395834 100 1.0 0.100837665228
reached tol!
50 exluding 1193 haps but keeping 517
1284
valid reads! 1186
-3214.14209301 1 1.0 0.0568241291773
-3038.29668272 10 1.0 0.101813617865
-3016.50546212 100 1.0 0.101893292387
reached tol!
51 exluding 1201 haps but keeping 509
1295
valid reads! 1175
-3159.53384699 1 1.0 0.0681621291805
-2996.26868063 10 1.0 0.104374257342
-2974.68423776 100 1.0 0.104367610852
reached tol!
52 exluding 1209 haps but keeping 501
1305
valid reads! 1165
-3121.80987394 1 1.0 0.0695823899637
-2961.45949287 10 1.0 0.105715286363
-2939.86403054 100 1.0 0.10570886869
reached tol!
53 exluding 1217 haps but keeping 493
1313
valid reads! 1157
-3075.18130469 1 1.0 0.0799522027934
-2927.6822994 10 1.0 0.107317537621
-2906.06248894 100 1.0 0.107306844289
reached tol!
54 exluding 1225 haps but keeping 485
1322
valid reads! 1148
-3043.28539558 1 1.0 0.0807913819084
-2896.00515045 10 1.0 0.108443552508
-2874.36060275 100 1.0 0.108432659987
reached tol!
55 exluding 1233 haps but keeping 477
1330
valid reads! 1139
-3004.47074812 1 1.0 0.0815218386526
-2857.65312096 10 1.0 0.10942343963
-2835.77193244 100 1.0 0.10941231663
reached tol!
56 exluding 1240 haps but keeping 470
1339
valid reads! 1130
-2972.87625513 1 1.0 0.082358481704
-2826.86903986 10 1.0 0.110548391473
-2804.98093732 100 1.0 0.110537041952
reached tol!
57 exluding 1247 haps but keeping 463
1348
valid reads! 1121
-2941.98214789 1 1.0 0.0832535983011
-2796.33419146 10 1.0 0.111720575477
-2774.43665205 100 1.0 0.11170908651
reached tol!
58 exluding 1254 haps but keeping 456
1355
valid reads! 1114
-2916.26220192 1 1.0 0.0839188667807
-2770.87983494 10 1.0 0.112613318883
-2748.9717831 100 1.0 0.112601738136
reached tol!
59 exluding 1261 haps but keeping 449
1363
valid reads! 1106
-2884.87338786 1 1.0 0.0847125358729
-2741.70352294 10 1.0 0.113680988839
-2719.74602487 100 1.0 0.113668824088
reached tol!
60 exluding 1268 haps but keeping 442
1370
valid reads! 1099
-2857.52341535 1 1.0 0.0854012556767
-2713.80695999 10 1.0 0.114605055946
-2691.88222613 100 1.0 0.114592811094
reached tol!
61 exluding 1275 haps but keeping 435
1379
valid reads! 1090
-2820.10853426 1 1.0 0.0876669714056
-2681.21622252 10 1.0 0.116102215872
-2659.29946364 100 1.0 0.116089635019
reached tol!
62 exluding 1282 haps but keeping 428
1388
valid reads! 1081
-2781.3142349 1 1.0 0.088628095632
-2645.19867093 10 1.0 0.117365348247
-2623.0887956 100 1.0 0.117351993896
reached tol!
63 exluding 1289 haps but keeping 421
1396
valid reads! 1043
-2631.4437981 1 1.0 0.0895040161976
-2496.35625677 10 1.0 0.118492026789
-2474.81325672 100 1.0 0.118482623959
reached tol!
64 exluding 1296 haps but keeping 414
1410
valid reads! 1008
-2511.60517424 1 1.0 0.0924484229862
-2384.68156196 10 1.0 0.120920659744
-2363.36441708 100 1.0 0.120912783761
reached tol!
65 exluding 1303 haps but keeping 407
1430
valid reads! 985
-2449.12342898 1 1.0 0.094997874883
-2325.70686712 10 1.0 0.123998397595
-2304.38456367 100 1.0 0.123990293597
reached tol!
66 exluding 1310 haps but keeping 400
1445
valid reads! 969
-2397.95820649 1 1.0 0.0968509269388
-2264.99994929 10 1.0 0.126526399474
-2242.61810124 100 1.0 0.126516523193
reached tol!
67 exluding 1317 haps but keeping 393
1452
valid reads! 962
-2360.99430845 1 1.0 0.0977741098411
-2193.91578458 10 1.0 0.127670344178
reached tol!
68 exluding 1323 haps but keeping 387
1486
valid reads! 926
-2288.37772167 1 1.0 0.102295793011
-2180.36402855 10 1.0 0.137511485444
reached tol!
69 exluding 1329 haps but keeping 381
1493
valid reads! 919
-2264.96341462 1 1.0 0.103267803463
-2156.77365333 10 1.0 0.138829273333
reached tol!
70 exluding 1335 haps but keeping 375
1501
valid reads! 911
-2239.1318315 1 1.0 0.104406768256
-2130.71796624 10 1.0 0.140387286833
reached tol!
71 exluding 1341 haps but keeping 369
1508
valid reads! 904
-2215.97774611 1 1.0 0.105428797838
-2108.0401171 10 1.0 0.141831732079
reached tol!
72 exluding 1346 haps but keeping 364
1514
valid reads! 898
-2194.73974324 1 1.0 0.106730206268
-2086.94372489 10 1.0 0.143427981201
reached tol!
73 exluding 1351 haps but keeping 359
1520
valid reads! 892
-2174.63238248 1 1.0 0.107656733011
-2066.72979724 10 1.0 0.14478629812
reached tol!
74 exluding 1356 haps but keeping 354
1532
valid reads! 877
-2133.21783858 1 1.0 0.109504917268
-2025.322421 10 1.0 0.147271193021
reached tol!
75 exluding 1361 haps but keeping 349
1600
valid reads! 766
-1944.23758024 1 1.0 0.0301259680891
-1884.70093311 10 1.0 0.0497439297281
reached tol!
76 exluding 1365 haps but keeping 345
1605
valid reads! 761
-1927.79269955 1 1.0 0.0303670576533
-1868.14856689 10 1.0 0.0501231021573
reached tol!
77 exluding 1369 haps but keeping 341
1610
valid reads! 756
-1911.54232986 1 1.0 0.0306120460085
-1851.88235825 10 1.0 0.0504888865557
reached tol!
78 exluding 1373 haps but keeping 337
1614
valid reads! 752
-1898.0103738 1 1.0 0.0308145192167
-1838.29260712 10 1.0 0.0508041311826
reached tol!
79 exluding 1377 haps but keeping 333
1618
valid reads! 748
-1884.64333967 1 1.0 0.0310200064998
-1824.91826204 10 1.0 0.0511207171444
reached tol!
80 exluding 1381 haps but keeping 329
1622
valid reads! 744
-1870.7904646 1 1.0 0.0312703298152
-1811.40099305 10 1.0 0.0514812481485
reached tol!
81 exluding 1385 haps but keeping 325
1626
valid reads! 740
-1855.76786229 1 1.0 0.0319364542712
-1797.92784896 10 1.0 0.0518485517126
reached tol!
82 exluding 1389 haps but keeping 321
1630
valid reads! 736
-1841.40518285 1 1.0 0.0322344137652
-1784.35655256 10 1.0 0.05223047686
reached tol!
83 exluding 1393 haps but keeping 317
1636
valid reads! 730
-1821.27615495 1 1.0 0.0327889943856
-1765.80504567 10 1.0 0.052843244053
reached tol!
84 exluding 1397 haps but keeping 313
1640
valid reads! 726
-1805.65912407 1 1.0 0.033314576631
-1751.44088011 10 1.0 0.0532825765659
reached tol!
85 exluding 1401 haps but keeping 309
1644
valid reads! 722
-1789.23760194 1 1.0 0.034035495705
-1737.27394089 10 1.0 0.0537728661932
reached tol!
86 exluding 1405 haps but keeping 305
1650
valid reads! 716
-1766.84135326 1 1.0 0.0353486508392
-1716.64749454 10 1.0 0.0548122685435
reached tol!
87 exluding 1409 haps but keeping 301
1658
valid reads! 708
-1744.18562956 1 1.0 0.035848487814
-1695.11495594 10 1.0 0.055582439997
reached tol!
88 exluding 1413 haps but keeping 297
1664
valid reads! 702
-1721.62176416 1 1.0 0.0375708843985
-1674.18030499 10 1.0 0.0569969811507
reached tol!
89 exluding 1417 haps but keeping 293
1670
valid reads! 695
-1695.35547443 1 1.0 0.0401237744843
-1650.79865558 10 1.0 0.0586238865097
reached tol!
90 exluding 1421 haps but keeping 289
1675
valid reads! 690
-1670.89401568 1 1.0 0.0439796608884
-1632.57534545 10 1.0 0.0598012878668
reached tol!
91 exluding 1425 haps but keeping 285
1680
valid reads! 685
-1630.83980145 1 1.0 0.0617419709752
-1590.82743914 10 1.0 0.0846898992303
-1578.61337683 100 1.0 0.103187645768
reached tol!
92 exluding 1429 haps but keeping 281
1687
valid reads! 678
-1571.24564349 1 1.0 0.108768382353
-1527.79026723 10 1.0 0.122940985993
reached tol!
93 exluding 1433 haps but keeping 277
1746
valid reads! 512
-1208.05954038 1 1.0 0.0390167208559
-1177.1153682 10 1.0 0.0543080767984
reached tol!
94 exluding 1437 haps but keeping 273
1752
valid reads! 506
-1188.20854283 1 1.0 0.040653673518
-1158.0537664 10 1.0 0.0558818838938
reached tol!
95 exluding 1441 haps but keeping 269
1758
valid reads! 500
-1170.20079283 1 1.0 0.0411724098304
-1140.02265968 10 1.0 0.0565996650211
reached tol!
96 exluding 1445 haps but keeping 265
1763
valid reads! 495
-1155.46392037 1 1.0 0.0416122860039
-1127.13470835 10 1.0 0.057203197387
reached tol!
97 exluding 1448 haps but keeping 262
1766
valid reads! 492
-1146.05007741 1 1.0 0.0418807523652
-1117.72221335 10 1.0 0.0575713851875
reached tol!
98 exluding 1451 haps but keeping 259
1770
valid reads! 488
-1134.04052601 1 1.0 0.0422665491729
-1105.52444741 10 1.0 0.0581583400031
reached tol!
99 exluding 1454 haps but keeping 256
1773
valid reads! 485
-1124.32332049 1 1.0 0.0425434042984
-1095.93055479 10 1.0 0.0585398030759
reached tol!
100 exluding 1457 haps but keeping 253
1776
valid reads! 482
-1114.89302102 1 1.0 0.0428239102608
-1086.50756461 10 1.0 0.0589259861289
reached tol!
101 exluding 1460 haps but keeping 250
1779
valid reads! 479
-1105.00609272 1 1.0 0.0431081397537
-1076.62460433 10 1.0 0.0593172120256
reached tol!
102 exluding 1463 haps but keeping 247
1783
valid reads! 475
-1092.74628483 1 1.0 0.0435352076437
-1063.93894801 10 1.0 0.0599631000804
reached tol!
103 exluding 1466 haps but keeping 244
1787
valid reads! 471
-1080.79907219 1 1.0 0.0439351456231
-1051.89110778 10 1.0 0.060553701906
reached tol!
104 exluding 1469 haps but keeping 241
1791
valid reads! 467
-1064.80976463 1 1.0 0.0468052838282
-1038.70019244 10 1.0 0.0618329324266
reached tol!
105 exluding 1472 haps but keeping 238
1794
valid reads! 464
-1055.15970437 1 1.0 0.047129463767
-1029.05401783 10 1.0 0.0622670134875
reached tol!
106 exluding 1475 haps but keeping 235
1799
valid reads! 458
-1038.16851376 1 1.0 0.0476749436717
-1012.24467661 10 1.0 0.0629673199952
reached tol!
107 exluding 1478 haps but keeping 232
1802
valid reads! 455
-1028.84118958 1 1.0 0.0480083348862
-1002.91080286 10 1.0 0.0634081756083
reached tol!
108 exluding 1481 haps but keeping 229
1806
valid reads! 451
-1017.00498388 1 1.0 0.048460178038
-991.058286811 10 1.0 0.0640043553796
reached tol!
109 exluding 1484 haps but keeping 226
1809
valid reads! 448
-1007.62494953 1 1.0 0.0488046816734
-981.666965444 10 1.0 0.0644508929616
reached tol!
110 exluding 1487 haps but keeping 223
1812
valid reads! 445
-997.637390766 1 1.0 0.0491607634135
-971.858261825 10 1.0 0.0649326356147
reached tol!
111 exluding 1490 haps but keeping 220
1817
valid reads! 440
-983.255569869 1 1.0 0.0497544924403
-957.616379879 10 1.0 0.0656969974084
reached tol!
112 exluding 1493 haps but keeping 217
1821
valid reads! 436
-971.116797208 1 1.0 0.0502399021226
-945.318772779 10 1.0 0.066337529834
reached tol!
113 exluding 1496 haps but keeping 214
1824
valid reads! 433
-961.422769427 1 1.0 0.0506137158363
-935.640376704 10 1.0 0.0668372251121
reached tol!
114 exluding 1499 haps but keeping 211
1827
valid reads! 430
-951.831187873 1 1.0 0.0510778651395
-926.169569113 10 1.0 0.0673878114076
reached tol!
115 exluding 1502 haps but keeping 208
1831
valid reads! 426
-940.171969717 1 1.0 0.0515886437908
-914.507069531 10 1.0 0.0680616895216
reached tol!
116 exluding 1505 haps but keeping 205
1834
valid reads! 423
-930.382361014 1 1.0 0.052021595792
-904.440899806 10 1.0 0.0686682564658
reached tol!
117 exluding 1508 haps but keeping 202
1837
valid reads! 420
-920.666296614 1 1.0 0.0524247820435
-894.722901395 10 1.0 0.069184999476
reached tol!
118 exluding 1511 haps but keeping 199
1841
valid reads! 416
-908.950729936 1 1.0 0.0529624721157
-882.943343909 10 1.0 0.0698859662377
reached tol!
119 exluding 1514 haps but keeping 196
1845
valid reads! 412
-897.326104403 1 1.0 0.0535113060236
-871.299404486 10 1.0 0.0706092512878
reached tol!
120 exluding 1517 haps but keeping 193
1850
valid reads! 406
-880.049696528 1 1.0 0.05425728034
-853.646839136 10 1.0 0.0717196694474
reached tol!
121 exluding 1520 haps but keeping 190
1853
valid reads! 403
-870.875378899 1 1.0 0.0546962681583
-844.479869852 10 1.0 0.0723169722402
reached tol!
122 exluding 1523 haps but keeping 187
1857
valid reads! 399
-859.199326559 1 1.0 0.0552812549835
-832.72946973 10 1.0 0.0730904124317
reached tol!
123 exluding 1526 haps but keeping 184
1860
valid reads! 396
-849.667835452 1 1.0 0.0557282732179
-823.149682192 10 1.0 0.0736411188276
reached tol!
124 exluding 1529 haps but keeping 181
1866
valid reads! 390
-833.980101561 1 1.0 0.0566443544215
-807.445088794 10 1.0 0.0748516567623
reached tol!
125 exluding 1532 haps but keeping 178
1869
valid reads! 387
-824.470424157 1 1.0 0.0571167531289
-798.143529794 10 1.0 0.0754843378733
reached tol!
126 exluding 1535 haps but keeping 175
1873
valid reads! 383
-812.643382577 1 1.0 0.0577645833907
-786.286774187 10 1.0 0.0763565379138
reached tol!
127 exluding 1538 haps but keeping 172
1877
valid reads! 379
-800.888368167 1 1.0 0.0584277529875
-774.680881629 10 1.0 0.0772509425965
reached tol!
128 exluding 1541 haps but keeping 169
1880
valid reads! 376
-791.705949597 1 1.0 0.0589271354916
-765.560763422 10 1.0 0.0779057481848
reached tol!
129 exluding 1544 haps but keeping 166
1883
valid reads! 372
-779.522066158 1 1.0 0.059435128039
-753.38012222 10 1.0 0.0785776922151
reached tol!
130 exluding 1547 haps but keeping 163
1886
valid reads! 369
-770.43738598 1 1.0 0.0599519552393
-744.310341672 10 1.0 0.0792582665304
reached tol!
131 exluding 1550 haps but keeping 160
1892
valid reads! 363
-754.981027522 1 1.0 0.0610130517922
-729.323928227 10 1.0 0.0806610679535
reached tol!
132 exluding 1553 haps but keeping 157
1899
valid reads! 356
-738.274343324 1 1.0 0.0622994715589
-713.459169245 10 1.0 0.0814132544042
reached tol!
133 exluding 1556 haps but keeping 154
1906
valid reads! 349
-721.438398169 1 1.0 0.063641306331
-697.197259575 10 1.0 0.0831204808217
reached tol!
134 exluding 1559 haps but keeping 151
1909
valid reads! 346
-710.646695344 1 1.0 0.064295510103
-687.696047054 10 1.0 0.0841142903542
reached tol!
135 exluding 1561 haps but keeping 149
1912
valid reads! 343
-701.95038653 1 1.0 0.0649710187641
-678.644598966 10 1.0 0.0850243169808
reached tol!
136 exluding 1563 haps but keeping 147
1914
valid reads! 341
-694.961619646 1 1.0 0.0656212819777
-672.031604689 10 1.0 0.0857314038746
reached tol!
137 exluding 1565 haps but keeping 145
1916
valid reads! 339
-688.699644493 1 1.0 0.0661356212226
-665.829678376 10 1.0 0.0863962760274
reached tol!
138 exluding 1567 haps but keeping 143
1918
valid reads! 337
-681.966160401 1 1.0 0.0666756816673
-658.639264303 10 1.0 0.0870815299426
reached tol!
139 exluding 1569 haps but keeping 141
1921
valid reads! 334
-673.079555581 1 1.0 0.0673209301996
-650.374571116 10 1.0 0.0879095264016
reached tol!
140 exluding 1571 haps but keeping 139
1923
valid reads! 332
-667.030493788 1 1.0 0.067758079097
-644.281527145 10 1.0 0.0884812815147
reached tol!
141 exluding 1573 haps but keeping 137
1928
valid reads! 327
-654.389496597 1 1.0 0.0688958440357
-631.530851842 10 1.0 0.0900436678157
reached tol!
142 exluding 1575 haps but keeping 135
1936
valid reads! 319
-637.450960839 1 1.0 0.0707642059079
-615.23232119 10 1.0 0.0924097540319
reached tol!
143 exluding 1577 haps but keeping 133
1952
valid reads! 291
-587.141653492 1 1.0 0.0437221679157
-570.338512562 10 1.0 0.0579872534776
reached tol!
144 exluding 1579 haps but keeping 131
1954
valid reads! 289
-581.051326618 1 1.0 0.0440378514386
-564.337114274 10 1.0 0.0583986390925
reached tol!
145 exluding 1581 haps but keeping 129
1957
valid reads! 286
-572.789514039 1 1.0 0.0445200176952
-556.146597192 10 1.0 0.0590380402504
reached tol!
146 exluding 1583 haps but keeping 127
1959
valid reads! 284
-566.561134278 1 1.0 0.0448473707665
-550.008078687 10 1.0 0.0594721434876
reached tol!
147 exluding 1585 haps but keeping 125
1962
valid reads! 281
-557.71685866 1 1.0 0.0453475273178
-541.752007683 10 1.0 0.0601354015934
reached tol!
148 exluding 1587 haps but keeping 123
1968
valid reads! 275
-544.770125024 1 1.0 0.0463820716672
-529.400725868 10 1.0 0.0615073118959
reached tol!
149 exluding 1588 haps but keeping 122
1969
valid reads! 274
-541.520137546 1 1.0 0.0473315488583
-526.399701927 10 1.0 0.0621271755814
reached tol!
150 exluding 1589 haps but keeping 121
1970
valid reads! 273
-538.217456007 1 1.0 0.0475128957888
-523.341301342 10 1.0 0.0623652107369
reached tol!
151 exluding 1590 haps but keeping 120
1971
valid reads! 272
-535.347348152 1 1.0 0.0476956376956
-520.471193487 10 1.0 0.062605076932
reached tol!
152 exluding 1591 haps but keeping 119
1972
valid reads! 271
-532.47883357 1 1.0 0.0478797907369
-517.602678905 10 1.0 0.0628467953758
reached tol!
153 exluding 1592 haps but keeping 118
1973
valid reads! 270
-529.331929884 1 1.0 0.0480653713212
-514.502550869 10 1.0 0.0630903876059
reached tol!
154 exluding 1593 haps but keeping 117
1974
valid reads! 269
-526.361340188 1 1.0 0.0482523961123
-511.510194979 10 1.0 0.0633358754954
reached tol!
155 exluding 1594 haps but keeping 116
1975
valid reads! 268
-523.441677108 1 1.0 0.0484408820346
-508.610060477 10 1.0 0.0635832812591
reached tol!
156 exluding 1595 haps but keeping 115
1976
valid reads! 267
-520.57959458 1 1.0 0.0486308462779
-505.747977949 10 1.0 0.0638326274601
reached tol!
157 exluding 1596 haps but keeping 114
1977
valid reads! 266
-517.613534962 1 1.0 0.0488223063026
-502.844739996 10 1.0 0.064083937017
reached tol!
158 exluding 1597 haps but keeping 113
1981
valid reads! 262
-508.631664959 1 1.0 0.0496761904762
-493.775039575 10 1.0 0.0656576490498
reached tol!
159 exluding 1598 haps but keeping 112
1987
valid reads! 251
-483.37631037 1 1.0 0.0735362997658
-467.539613031 10 1.0 0.0877092571064
reached tol!
160 exluding 1599 haps but keeping 111
1991
valid reads! 247
-475.195805476 1 1.0 0.0747619047619
-460.341755384 10 1.0 0.0891710780582
reached tol!
161 exluding 1600 haps but keeping 110
1992
valid reads! 246
-472.083291934 1 1.0 0.0750747160789
-457.330781007 10 1.0 0.0895441788032
reached tol!
162 exluding 1601 haps but keeping 109
1993
valid reads! 245
-469.091389042 1 1.0 0.0753901560624
-454.410439617 10 1.0 0.0899204148486
reached tol!
163 exluding 1602 haps but keeping 108
1994
valid reads! 244
-466.162642859 1 1.0 0.0757082579867
-451.459101089 10 1.0 0.0902998258817
reached tol!
164 exluding 1603 haps but keeping 107
1997
valid reads! 241
-459.326780789 1 1.0 0.0766788766789
-444.811446226 10 1.0 0.0914575159571
reached tol!
165 exluding 1604 haps but keeping 106
2000
valid reads! 238
-452.383246286 1 1.0 0.0776747062461
-438.085732346 10 1.0 0.0926452759046
reached tol!
166 exluding 1605 haps but keeping 105
2001
valid reads! 237
-449.210619542 1 1.0 0.0780124223602
-434.908859353 10 1.0 0.093048081452
reached tol!
167 exluding 1606 haps but keeping 104
2002
valid reads! 236
-446.402296239 1 1.0 0.0783530879601
-432.100536049 10 1.0 0.0934544049518
reached tol!
168 exluding 1607 haps but keeping 103
2003
valid reads! 235
-443.052361639 1 1.0 0.0786967418546
-428.800270091 10 1.0 0.0938642926928
reached tol!
169 exluding 1608 haps but keeping 102
2004
valid reads! 234
-440.165737576 1 1.0 0.0790434235368
-425.945560002 10 1.0 0.0942777917796
reached tol!
170 exluding 1609 haps but keeping 101
2005
valid reads! 233
-437.331045019 1 1.0 0.0793931731985
-423.126034449 10 1.0 0.0946949501503
reached tol!
171 exluding 1610 haps but keeping 100
2007
valid reads! 231
-432.082248497 1 1.0 0.0801020408163
-417.775074517 10 1.0 0.0955404407767
reached tol!
172 exluding 1611 haps but keeping 99
2009
valid reads! 229
-426.919500896 1 1.0 0.0808236808237
-412.466621847 10 1.0 0.0964011654683
reached tol!
173 exluding 1612 haps but keeping 98
2010
valid reads! 228
-424.081118996 1 1.0 0.0812396753573
-409.647528605 10 1.0 0.096903276421
reached tol!
174 exluding 1613 haps but keeping 97
2011
valid reads! 227
-421.186872169 1 1.0 0.0816089466089
-406.801338918 10 1.0 0.0973437458593
reached tol!
175 exluding 1614 haps but keeping 96
2012
valid reads! 226
-418.397293577 1 1.0 0.0819815902008
-404.011760326 10 1.0 0.0977882378495
reached tol!
176 exluding 1615 haps but keeping 95
2013
valid reads! 225
-415.18629406 1 1.0 0.0823576525411
-400.922998709 10 1.0 0.0982368077479
reached tol!
177 exluding 1616 haps but keeping 94
2017
valid reads! 220
-404.519127155 1 1.0 0.0838970479157
-390.192015793 10 1.0 0.100073009762
reached tol!
178 exluding 1617 haps but keeping 93
2018
valid reads! 219
-401.153866409 1 1.0 0.0842909307698
-386.876566162 10 1.0 0.100542836099
reached tol!
179 exluding 1618 haps but keeping 92
2019
valid reads! 218
-397.944204637 1 1.0 0.0846885294999
-383.711387984 10 1.0 0.10101709476
reached tol!
180 exluding 1619 haps but keeping 91
2020
valid reads! 217
-394.22644642 1 1.0 0.0850898969382
-380.421772384 10 1.0 0.101495848763
reached tol!
181 exluding 1620 haps but keeping 90
2021
valid reads! 216
-391.267176382 1 1.0 0.0854950869237
-377.458282302 10 1.0 0.101979162329
reached tol!
182 exluding 1621 haps but keeping 89
2022
valid reads! 215
-388.499260234 1 1.0 0.0859041543252
-374.690366153 10 1.0 0.102467100904
reached tol!
183 exluding 1622 haps but keeping 88
2023
valid reads! 214
-385.71815752 1 1.0 0.0863171550672
-371.90926344 10 1.0 0.102959731197
reached tol!
184 exluding 1623 haps but keeping 87
2024
valid reads! 213
-382.932386037 1 1.0 0.0867341461544
-369.126588999 10 1.0 0.103457121203
reached tol!
185 exluding 1624 haps but keeping 86
2026
valid reads! 211
-378.012827128 1 1.0 0.0875803329462
-364.207030091 10 1.0 0.104466458971
reached tol!
186 exluding 1625 haps but keeping 85
2027
valid reads! 210
-375.255097605 1 1.0 0.0880096483038
-361.449300567 10 1.0 0.104978549456
reached tol!
187 exluding 1626 haps but keeping 84
2028
valid reads! 209
-371.947850293 1 1.0 0.0884431933693
-358.262261018 10 1.0 0.105495685168
reached tol!
188 exluding 1627 haps but keeping 83
2030
valid reads! 207
-366.763100298 1 1.0 0.0906163626313
-353.052504492 10 1.0 0.107838839329
reached tol!
189 exluding 1628 haps but keeping 82
2032
valid reads! 205
-361.761761244 1 1.0 0.0915270798437
-347.954896116 10 1.0 0.108922646759
reached tol!
190 exluding 1629 haps but keeping 81
2033
valid reads! 204
-358.282851813 1 1.0 0.0919893378227
-344.774877537 10 1.0 0.109472761137
reached tol!
191 exluding 1630 haps but keeping 80
2034
valid reads! 203
-355.511913742 1 1.0 0.0924562887761
-342.002171724 10 1.0 0.110028460432
reached tol!
192 exluding 1631 haps but keeping 79
2035
valid reads! 202
-352.398777183 1 1.0 0.0929280045351
-338.8417133 10 1.0 0.110589830128
reached tol!
193 exluding 1632 haps but keeping 78
2037
valid reads! 200
-346.25036677 1 1.0 0.102282768778
-333.352612073 10 1.0 0.115511346205
reached tol!
194 exluding 1633 haps but keeping 77
2038
valid reads! 199
-343.515925277 1 1.0 0.10281273131
-330.61817058 10 1.0 0.11610985059
reached tol!
195 exluding 1634 haps but keeping 76
2039
valid reads! 198
-340.709056334 1 1.0 0.103348214286
-327.819305545 10 1.0 0.116714589395
reached tol!
196 exluding 1635 haps but keeping 75
2040
valid reads! 197
-337.950467568 1 1.0 0.103889304413
-325.060716779 10 1.0 0.117325660544
reached tol!
197 exluding 1636 haps but keeping 74
2041
valid reads! 196
-334.828885095 1 1.0 0.10545112782
-322.127521501 10 1.0 0.118626233568
reached tol!
198 exluding 1637 haps but keeping 73
2042
valid reads! 195
-330.953036205 1 1.0 0.106009070295
-318.881236666 10 1.0 0.119253885597
reached tol!
199 exluding 1638 haps but keeping 72
2043
valid reads! 194
-328.169951542 1 1.0 0.106572948328
-316.115122258 10 1.0 0.11989402647
reached tol!
200 exluding 1639 haps but keeping 71
2044
valid reads! 193
-325.203915391 1 1.0 0.107575757576
-313.25782921 10 1.0 0.120891213306
reached tol!
201 exluding 1640 haps but keeping 70
2045
valid reads! 192
-322.470917553 1 1.0 0.108154121864
-310.524951653 10 1.0 0.12154234068
reached tol!
202 exluding 1641 haps but keeping 69
2046
valid reads! 191
-319.754232388 1 1.0 0.108738738739
-307.808266488 10 1.0 0.122199326305
reached tol!
203 exluding 1642 haps but keeping 68
2047
valid reads! 190
-317.019293267 1 1.0 0.109329710145
-305.073481031 10 1.0 0.122865133779
reached tol!
204 exluding 1643 haps but keeping 67
2048
valid reads! 189
-313.974370051 1 1.0 0.109927140255
-302.231415416 10 1.0 0.123536527953
reached tol!
205 exluding 1644 haps but keeping 66
2050
valid reads! 187
-309.165494759 1 1.0 0.111141804788
-297.376835312 10 1.0 0.124901572461
reached tol!
206 exluding 1645 haps but keeping 65
2051
valid reads! 186
-305.805143827 1 1.0 0.111759259259
-294.430326576 10 1.0 0.125595470086
reached tol!
207 exluding 1646 haps but keeping 64
2052
valid reads! 185
-303.102268609 1 1.0 0.112383612663
-291.727451358 10 1.0 0.126297120756
reached tol!
208 exluding 1647 haps but keeping 63
2053
valid reads! 184
-297.553479207 1 1.0 0.113014981273
-286.521609677 10 1.0 0.127006655143
reached tol!
209 exluding 1648 haps but keeping 62
2054
valid reads! 183
-294.855306526 1 1.0 0.113653483992
-283.823436996 10 1.0 0.127724206867
reached tol!
210 exluding 1649 haps but keeping 61
2055
valid reads! 182
-292.159504324 1 1.0 0.114299242424
-281.127634794 10 1.0 0.128449912587
reached tol!
211 exluding 1650 haps but keeping 60
2056
valid reads! 181
-289.342263633 1 1.0 0.114952380952
-278.287597619 10 1.0 0.129183912088
reached tol!
212 exluding 1651 haps but keeping 59
2057
valid reads! 180
-286.651241512 1 1.0 0.11561302682
-275.596575498 10 1.0 0.129926348364
reached tol!
213 exluding 1652 haps but keeping 58
2060
valid reads! 177
-280.159003413 1 1.0 0.117641325536
-269.209717064 10 1.0 0.132205757985
reached tol!
214 exluding 1653 haps but keeping 57
2061
valid reads! 176
-277.477702036 1 1.0 0.118333333333
-266.528415688 10 1.0 0.132983438914
reached tol!
215 exluding 1654 haps but keeping 56
2063
valid reads! 174
-272.724566885 1 1.0 0.119742063492
-261.775280537 10 1.0 0.134566575092
reached tol!
216 exluding 1655 haps but keeping 55
2064
valid reads! 173
-270.050701373 1 1.0 0.120459081836
-259.101415025 10 1.0 0.135372362966
reached tol!
217 exluding 1656 haps but keeping 54
2068
valid reads! 169
-261.953397146 1 1.0 0.123415132924
-252.0056751 10 1.0 0.138694384144
reached tol!
218 exluding 1657 haps but keeping 53
2075
valid reads! 162
-249.596593211 1 1.0 0.128952991453
-239.951057887 10 1.0 0.144917850099
reached tol!
219 exluding 1658 haps but keeping 52
2076
valid reads! 161
-246.953811683 1 1.0 0.129784946237
-237.308276359 10 1.0 0.14585280397
reached tol!
220 exluding 1659 haps but keeping 51
2077
valid reads! 160
-244.28088813 1 1.0 0.130627705628
-234.635866815 10 1.0 0.146802945126
reached tol!
221 exluding 1660 haps but keeping 50
2078
valid reads! 159
-241.582389951 1 1.0 0.131481481481
-231.940566897 10 1.0 0.147767576388
reached tol!
222 exluding 1661 haps but keeping 49
2079
valid reads! 158
-238.947739346 1 1.0 0.132346491228
-229.305916292 10 1.0 0.148739731496
reached tol!
223 exluding 1662 haps but keeping 48
2080
valid reads! 157
-236.231985856 1 1.0 0.133222958057
-226.578441589 10 1.0 0.14972476283
reached tol!
224 exluding 1663 haps but keeping 47
2081
valid reads! 156
-233.454411087 1 1.0 0.134111111111
-223.831545772 10 1.0 0.150722927916
reached tol!
225 exluding 1664 haps but keeping 46
2082
valid reads! 155
-230.643577959 1 1.0 0.135011185682
-221.153374749 10 1.0 0.151734491191
reached tol!
226 exluding 1665 haps but keeping 45
2083
valid reads! 154
-228.020010525 1 1.0 0.135923423423
-218.529807316 10 1.0 0.152759724239
reached tol!
227 exluding 1666 haps but keeping 44
2097
valid reads! 140
-210.114551917 1 1.0 0.150124378109
-203.175890627 10 1.0 0.168719695428
reached tol!
228 exluding 1667 haps but keeping 43
2098
valid reads! 139
-207.53525824 1 1.0 0.151253132832
-200.59659695 10 1.0 0.169988264567
reached tol!
229 exluding 1668 haps but keeping 42
2099
valid reads! 138
-204.819524717 1 1.0 0.152398989899
-197.985698243 10 1.0 0.17127605445
reached tol!
230 exluding 1669 haps but keeping 41
2102
valid reads! 135
-198.630738994 1 1.0 0.155943152455
-191.856176114 10 1.0 0.175179801725
reached tol!
231 exluding 1670 haps but keeping 40
2106
valid reads! 131
-191.267189243 1 1.0 0.160933333333
-185.540749673 10 1.0 0.18078555538
reached tol!
232 exluding 1671 haps but keeping 39
2107
valid reads! 130
-188.643010396 1 1.0 0.162231182796
-182.916570826 10 1.0 0.182243503407
reached tol!
233 exluding 1672 haps but keeping 38
2111
valid reads! 126
-180.883268293 1 1.0 0.167638888889
-175.156828722 10 1.0 0.188318286854
reached tol!
234 exluding 1673 haps but keeping 37
2112
valid reads! 125
-178.349801465 1 1.0 0.169047619048
-172.623361894 10 1.0 0.189900793467
reached tol!
235 exluding 1674 haps but keeping 36
2113
valid reads! 124
-175.632106322 1 1.0 0.170480225989
-169.86839677 10 1.0 0.191510122225
reached tol!
236 exluding 1675 haps but keeping 35
2114
valid reads! 123
-172.889222066 1 1.0 0.171937321937
-167.149383677 10 1.0 0.193109800796
reached tol!
237 exluding 1676 haps but keeping 34
2116
valid reads! 121
-168.448859745 1 1.0 0.174927536232
-162.709021356 10 1.0 0.196468232114
reached tol!
238 exluding 1677 haps but keeping 33
2118
valid reads! 119
-163.966859287 1 1.0 0.17802359882
-158.144615425 10 1.0 0.199945545957
reached tol!
239 exluding 1678 haps but keeping 32
2120
valid reads! 117
-159.610239949 1 1.0 0.181231231231
-153.874464067 10 1.0 0.203548168406
reached tol!
240 exluding 1679 haps but keeping 31
2121
valid reads! 116
-156.744739022 1 1.0 0.182878787879
-151.244378671 10 1.0 0.205398606301
reached tol!
241 exluding 1680 haps but keeping 30
2122
valid reads! 115
-154.247234806 1 1.0 0.184556574924
-148.746874456 10 1.0 0.207282997185
reached tol!
242 exluding 1681 haps but keeping 29
2123
valid reads! 114
-150.654289319 1 1.0 0.186265432099
-145.153928969 10 1.0 0.209202284195
reached tol!
243 exluding 1682 haps but keeping 28
2124
valid reads! 113
-148.164348072 1 1.0 0.18800623053
-142.663987722 10 1.0 0.21115744573
reached tol!
244 exluding 1683 haps but keeping 27
2125
valid reads! 112
-145.678238103 1 1.0 0.189779874214
-140.177877753 10 1.0 0.213149497105
reached tol!
245 exluding 1684 haps but keeping 26
2126
valid reads! 111
-142.981611295 1 1.0 0.191587301587
-137.523333535 10 1.0 0.215179492315
reached tol!
246 exluding 1685 haps but keeping 25
2129
valid reads! 108
-137.001676482 1 1.0 0.197222222222
-131.541863382 10 1.0 0.221336180494
reached tol!
247 exluding 1686 haps but keeping 24
2130
valid reads! 107
-133.880332557 1 1.0 0.199174917492
-128.599806955 10 1.0 0.223527627826
reached tol!
248 exluding 1687 haps but keeping 23
2135
valid reads! 102
-125.365990656 1 1.0 0.209548611111
-120.00676294 10 1.0 0.235169691775
reached tol!
249 exluding 1688 haps but keeping 22
2136
valid reads! 101
-122.553844637 1 1.0 0.211754385965
-117.429991892 10 1.0 0.237645162215
reached tol!
250 exluding 1689 haps but keeping 21
2138
valid reads! 99
-117.93263343 1 1.0 0.220250896057
-112.754408957 10 1.0 0.246152816625
reached tol!
251 exluding 1690 haps but keeping 20
2140
valid reads! 97
-113.476966064 1 1.0 0.228937728938
reached tol!
done!
('TACTATTAACAAGAGATGGTGGTAATGATAATAGTAATAATAGTACTGGGAATGAGACCTTCAGACCTGGAGGAGGAAATATGAAGGACAATTGGAGAAGTGAATTATATAAATACAAAGTAGTAAAAATTGAACCATTAGGAATAGCACCCACCAGGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCGGTAACATTGGGAGCTATGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCGTCAATGACGCTGACGGTACAGGCCAGACTATTGCTGTCTGGTATAGTGCAGCAGCAGAACAATCTGCTGAAGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGT', 0.25592478481528563)



In [34]:

    
from tempfile import NamedTemporaryFile as NTF
from subprocess import check_output, check_call
import shlex
import os
from concurrent.futures import ProcessPoolExecutor
import csv
from StringIO import StringIO
from itertools import islice
from functools import partial

def check_seqs(db_path, seqs):
    cmd = "blastn -db %(db)s -query %(q)s -outfmt '10 qseqid sseqid pident nident' -num_threads 20 -max_target_seqs 1"
    fields = ['SeqA', 'SeqB', 'pident', 'nident']
    dpath =  '/home/will/tmpstuf/haptest/tmpseqs/'
    
    with NTF(suffix='.fa', dir=dpath, delete=False) as check_handle:
        
        fasta_writer(check_handle, seqs)
        check_handle.flush()
        os.fsync(check_handle.fileno())
        
        tdict = {
                 'db':db_path,
                 'q':check_handle.name
                 }
        cmd_list = shlex.split(cmd % tdict)
        out = check_output(cmd_list)
        reader = csv.DictReader(StringIO(out), fieldnames=fields)
        return list(reader)
    

def yield_blocks(iterable, block_size):
    
    block = list(islice(iterable, block_size))
    while block:
        yield block
        block = list(islice(iterable, block_size))
    

def blast_all_v_all(seqsA, seqsB, block_size=20):
        
    dpath = '/home/will/tmpstuf/haptest/tmpseqs/'
    with NTF(suffix='.fa', dir=dpath, delete=False) as db_handle:
        fasta_writer(db_handle, seqsA)
        db_handle.flush()
        os.fsync(db_handle.fileno())
        
        cmd = 'makeblastdb -in %s -dbtype nucl' % db_handle.name
        cmd_list = shlex.split(cmd)
        check_call(cmd_list)
        
        align_func = partial(check_seqs, db_handle.name)
        check_iterable = islice(yield_blocks(iter(seqsB), 200), 20)
        with ProcessPoolExecutor(max_workers=5) as pool:
            res_iter = pool.map(align_func, check_iterable)
            for num, block in enumerate(res_iter):
                print num, len(block)



In [35]:

    
blast_all_v_all(sA, sB)



In [5]:

    
with open('/home/will/tmpstuf/haptest/DrexelMed.A0107.R02.fa') as handle:
    sA = list(fasta_reader(handle))
    
with open('/home/will/tmpstuf/haptest/DrexelMed.A0107.fa') as handle:
    sB = list(fasta_reader(handle))



In [19]:

    
sA[:5]









    Out[19]:





[('GHMBLKH05F3UU8',
  'CAGGTCATGTTATCCAATTGCACTGAAGATTTATTACTCCAACTAGTATTCCAAGGCACACAGGGATAGG'),
 ('GHMBLKH05F690C',
  'CAGGTCATGTTATCCAATTGCACTGAAGATTATTACTCCAACTAGTATTCCAAGGCACACAGGGATAGG'),
 ('GHMBLKH05GBUOO',
  'CAGTCATGTTATCCAATTGCACTGAAGATTATTACTCCAACTAGTATTCCAAGGCACACAGGGA'),
 ('GHMBLKH05GG027', 'ACTGAAGATTATTACTCCAACTAGTATTCCAAGGCACACAGGGATAGG'),
 ('GHMBLKH05F6XK9', 'AATTGCACTAAGATTATTACTCCAACTAGTATTCCAAGGCACACAGGGGATAGG')]



In [ ]: