In [1]:
import pysam

In [39]:
counter = 0
with pysam.Samfile('../outputs/moleculo/LR6000017-DNA_A01-LRAAA-AllReads.unmasked.sorted.bam', "rb") as samfile:
    with open('../outputs/moleculo/reads_lt_90_2.fasta', 'w') as f:
        for record in samfile:
            reclen = len(record.seq)
            if (len(record.aligned_pairs) < .9 * reclen) and (reclen > 9000):
                f.write('>' + record.qname + '\n')
                f.write(record.seq)
                f.write('\n')
                print(record.qname, reclen, len(record.aligned_pairs), record.aligned_pairs[-1][0] - record.aligned_pairs[0][0])
                if counter > 50:
                    break
                counter += 1


('Read_50775-Barcode=BC093-PIPELINE=Develop_T40', 9910, 8266, 8265L)
('Read_51334-Barcode=BC097-PIPELINE=V41', 9716, 7190, 7189L)
('Read_73904-Barcode=BC139-PIPELINE=Develop_T40', 9104, 7063, 7061L)
('Read_136857-Barcode=BC257-PIPELINE=V41', 9714, 4597, 4594L)
('Read_120202-Barcode=BC227-PIPELINE=Develop_T40', 10607, 8289, 8287L)
('Read_185111-Barcode=BC351-PIPELINE=Develop_T40', 9132, 5478, 5476L)
('Read_152279-Barcode=BC287-PIPELINE=Develop_T40', 11097, 8347, 8331L)
('Read_45922-Barcode=BC084-PIPELINE=Develop_T40', 9300, 7742, 7689L)
('Read_171528-Barcode=BC325-PIPELINE=Develop_T40', 10764, 8677, 8673L)
('Read_114527-Barcode=BC211-PIPELINE=V41', 10426, 9169, 9167L)
('Read_53278-Barcode=BC100-PIPELINE=V41', 9122, 5003, 5002L)
('Read_76481-Barcode=BC141-PIPELINE=V41', 10078, 6684, 6651L)
('Read_203089-Barcode=BC377-PIPELINE=V41', 9772, 5852, 5850L)
('Read_154072-Barcode=BC282-PIPELINE=V41', 9656, 7962, 7954L)
('Read_97467-Barcode=BC183-PIPELINE=Develop_T40', 9449, 6340, 6326L)
('Read_152367-Barcode=BC284-PIPELINE=V41', 11155, 6635, 6630L)
('Read_85130-Barcode=BC158-PIPELINE=V41', 10032, 5099, 5094L)
('Read_151503-Barcode=BC286-PIPELINE=Develop_T40', 10175, 5330, 5329L)
('Read_84538-Barcode=BC158-PIPELINE=Develop_T40', 9455, 4904, 4894L)
('Read_161842-Barcode=BC306-PIPELINE=Develop_T40', 12025, 7997, 7994L)
('Read_70464-Barcode=BC133-PIPELINE=V41', 9467, 7594, 7592L)
('Read_58472-Barcode=BC107-PIPELINE=V41', 9026, 6808, 6802L)
('Read_85730-Barcode=BC159-PIPELINE=V41', 10541, 9000, 8985L)
('Read_154012-Barcode=BC295-PIPELINE=V41', 9716, 5754, 5752L)
('Read_94517-Barcode=BC177-PIPELINE=Develop_T40', 10267, 7142, 7127L)
('Read_139173-Barcode=BC263-PIPELINE=Develop_T40', 9917, 7989, 7985L)
('Read_95040-Barcode=BC176-PIPELINE=V41', 10334, 6599, 6592L)
('Read_107741-Barcode=BC200-PIPELINE=V41', 9543, 6770, 6759L)
('Read_152938-Barcode=BC289-PIPELINE=Develop_T40', 9269, 6391, 6388L)
('Read_36174-Barcode=BC067-PIPELINE=V41', 9869, 8504, 8503L)
('Read_105857-Barcode=BC199-PIPELINE=Develop_T40', 11038, 9025, 8999L)
('Read_203168-Barcode=BC372-PIPELINE=V41', 9453, 8181, 8180L)
('Read_123396-Barcode=BC239-PIPELINE=V41', 9549, 7520, 7515L)
('Read_29443-Barcode=BC057-PIPELINE=Develop_T40', 9219, 4824, 4819L)
('Read_25839-Barcode=BC050-PIPELINE=V41', 11281, 7743, 7740L)
('Read_88322-Barcode=BC167-PIPELINE=V41', 10540, 7475, 7465L)
('Read_201006-Barcode=BC373-PIPELINE=V41', 10258, 8215, 8203L)
('Read_58402-Barcode=BC107-PIPELINE=Develop_T40', 9597, 8374, 8367L)
('Read_188183-Barcode=BC344-PIPELINE=V41', 9036, 4919, 4903L)
('Read_529-Barcode=BC001-PIPELINE=V41', 9639, 5613, 5598L)
('Read_138723-Barcode=BC262-PIPELINE=Develop_T40', 9506, 6615, 6614L)
('Read_57097-Barcode=BC105-PIPELINE=V41', 10733, 5896, 5894L)
('Read_44479-Barcode=BC085-PIPELINE=Develop_T40', 10211, 6515, 6514L)
('Read_106526-Barcode=BC198-PIPELINE=V41', 10560, 9263, 9217L)
('Read_135523-Barcode=BC256-PIPELINE=Develop_T40', 11072, 6121, 6119L)
('Read_139355-Barcode=BC255-PIPELINE=V41', 9016, 5883, 5875L)
('Read_73208-Barcode=BC138-PIPELINE=V41', 9908, 5677, 5675L)
('Read_119899-Barcode=BC231-PIPELINE=V41', 9491, 8141, 8140L)
('Read_155578-Barcode=BC284-PIPELINE=V41', 9791, 7799, 7797L)
('Read_89485-Barcode=BC166-PIPELINE=V41', 9833, 5656, 5640L)
('Read_17252-Barcode=BC032-PIPELINE=V41', 9083, 7160, 7159L)
('Read_176805-Barcode=BC323-PIPELINE=V41', 9616, 6703, 6699L)

In [34]:
counter


Out[34]:
1775

In [19]:
import screed

In [25]:
db = screed.ScreedDB('../outputs/moleculo/LR6000017-DNA_A01-LRAAA-AllReads.fastq_screed')

In [40]:
db.keys()[:20]


Out[40]:
[u'Read_200587-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200588-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200589-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200590-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200591-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200592-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200593-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200594-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200595-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200596-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200597-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200598-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200599-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200600-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200601-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200602-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200603-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200604-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200605-PIPELINE=V41:SPLIT=2/3-File=5_500',
 u'Read_200606-PIPELINE=V41:SPLIT=2/3-File=5_500']

In [27]:
len('GTGGAGGAGAAGGACCTGGGGGTGTTGGTTGGCAGCTGAACACGGGCCTCAAGTTGTGCCAGGAAAGGTTTAAATTGGTCATTAGGAAGAGTGCTTCTGTGGACAGGGTGGTGAGAGGCTGTGATGGGCTGCGCCAGGCAGCAGTGCTGTCACCACCCCTGCAGGTGTTGGAGAGATGGTGGATGTGGCACTGAGGGATGTGGCTTAGGGGAGCTTGGGGGTCTGACAGTATCAGCTGGTGGTTGGACTTGATTTTAGAGGTCTTTTCCAACCTGAATGGCTCGATGGTGCTAATGGCTAAATAGGAATTGCAAGCACCGGGCTGTTTCTGCCCCAAGTCAGTGAGACCAATGCGTGTGTGCTGACCTGGCTTTGGGACAGTAATTCTGCTGTTCCTGAGTTCAGCCTCTGGGGTGGTGCAAAGGTGATGGAGCCGAGCTGCAGTGGTGCCCGTGGGTGGGATGGGGCAGTGGGCACAAACTGGAATGCAGGAAAACACCTCTGAACACCGTGAACTGCTCATCTGCTGTGTGGTGACGGAGCCTCAGCACAACCCAGAGCGGCTGTGGAGTTCCCCTCCTTGGGAAGCTGTCTGGAACTGATCTGGAGTAGAGGTGGTGCGCATTTGTGCCTCTGAGAAGCTGTGGCCTCGTGTTGAGTGGGAAGAACAGATTCACCCTCCTCAAACACTGCTGATGGTCACAGAGGCATTTTCATTTTCCCTGTCTTTAAATGAGTCTTTTCAGTTGCCGGCTTGGTGAACAGTGAATGGTGAAAGAAGAAATTTCCCTGTAAGCCTGTGCTACTTTGGAGCTCCACGGCGGCTGCAGGCCAGCTAAGATGCCTGTGAAATACTGGCACTATGTTTGCTGGCTGTTAAGCTGATACAGAACACGTGCACTCCTAGCTTTGTTGCAGTGTCCTTCACCAGAAGTGTTATCTCTAGAACAGGAGTCCAAGGCCTGCCTTCTTGCTTCACTGATTGCTTCCCAGGAATTCTGTTCTGCAGGCTGCAGAGGCTGAGATTGTGCCATAAAGGCATTAGAAATCAGTGTGTGTGTTTGCAGGGACGAGAGAAATCGTGTTTTCCAAACGAGGAAGACTTTTCACTTTCTATGCTTATTGCAGATGAGTGATTACGATAGACCCAGAGCGTGCTGTGTAGGAAGGAGAGAGGTTTTTTATCTCCTTACTGTCAAATAAATTGTTTTTCTTCTCTTCTGCCTCCCAGAACCCACGAGATAAACAGGAATGGTGTGAAGGAGCTGTGTGTGAGCAGCAGCACTGCCCTGGAAACCTCCCCCCTACTGAGCAAGCAGACCTATGAGCTCCTCAGTCCATGAAGGAGGTTGGTGTTCTCGTGGCCTTCTTGCTTCCTGATAGAGAAACGCAATTAGCACACTTAGTTTAAAGCCAGGTAGTTGGAAGCTGTGACTGCCACGCAGAAGTGATGCAGTGGTATTCTCTTCCTTGTTGGTGTTTCCATGCTTTACGTGGCAATGTGAGAGAGGAAGGGTTTGTTTGCAGAGATAAAGGAGATGGAGGATGAGCGTATGGCCAACTTGAGAAGTCTCCTTCTGGCTAAACACATTAAAACTTGATGTAAGCTTGAGTTTGTATCTAGCTTAATACTTGCGTGTCCTGCTGCAGTCAGGCTACAGCAGGGAGAACTGTAAGCAAACTAAAAGCTCCTTCTGCCTCCTCCCTCCGAGCGGTGTGGGGGAATGGGAGTCACGTGCAGTTTGTAATGCTTCACCTCTGCACTCCTTCATCCTCGCTCTGCTGTGAGGTCGTTCCTATGGGATGCCACCCTTCCCAAACTGATCCTGCCCAGGCTTCCCACAGGAATGGATTTGCTCCAGTGCGAGGCCCCTCAGGCTGCAGCTCCTGCCCAATGATGTGCTCCAATGTAGACTTAACTGCATGGACTGCACTCAGGCCTTGGGCCAACTCCTGCAGGGCCTTTCCGTGGGCTGCAGCCTCTTTAGGCCACATCTGCCTGCTCTGCAGCGGGCTCCTCTCCACAGGCTGCAGGGAACTTCTGTTCCATGCCTGGAGCACCTCTTGCCCTCCTCCTGCACCCACCTTGCTGTTCTCTCAACATTCTCTCCCTTCTGTCTCTGAGCGGTGCAGCATTTCTTCCCCTTTCTTCAGTCTGCTCTCCCAGAGGTGCGCTCAGCATCCGGAGCTGTCTGTGCCATGAAGCAGCTCCTGCACTCTGACAGAGGCCACCCCAGCAGCCCAAACCTTGTCACACAAACCCGATGCAATTCCTGAGGTTGTTCTGTTCTTTTGTGGTTTAACCCTGGCAGGCAGCTCGGCACCACACAGCTGCTTGCTCTCCGTCCCTAGGTGGGATGGGAAAGAGGATTGGAAAGGTAAAAGTGCGAGAACTCAATGAGCTGAGGTAAACCACTGTGTTCCCTTGGCAGAGGAGTTGTATTCTCCGTAAGCCTGATCAGTGTGGAGGGATGCCTTTTTTTCCCCCAGCAGTTTTGGCTGTATCCTGACTCTTTGCTCTGTTTCAGGCTGAAGATGCTGAGCCGTTTGTCAGGTTTGGCGAATACTGTTCTACAAGAGCTGTCTGGTGATGGTGGAGATGCAGTTACTGACTCTGAGTCCTCTGTCACTGTAAGTACGCTGTAGTGGAAAGACTTCCTGGCCTGACGTGGAATTCGTTTTGGGAGTATAAACCATCTGGCTGCAGTGAGAACTGTTTCCTCCTTCACTGCTGAGTGCTGAAGTGAACTACTGAGCCAATGTTCCAATCTGCTTCTGTATTCCCAACATCTTAAGTCAGCGCTTACGTGGGGCAAACCAGAATCTACTTCAGTTTCTAAATCCCAAAGTGATGCTAGAAAAAGCATTAGAATGAGAATGAAGGAAGATTTTCAAGTTTCAGTGGTTCTGTACAGCAGGAAGGGGAGCTCTGCACAGTTGGGTGCCAGGAGAAGACTTTGAGCTGCATGCTGCCCTTTAGCACCTGGGGCAGTGCAGAGCACATGAGTGGTGTCATCTCCTGAGAGCCACTCACTCCAGCTTTCCTGTGTTCATTGAAGATGCTTGCATTCTTTGTTTGTTTGTTTTGTTTTGTTTTCCCCCTCCTTATACATAGTATCATAGAATGGCCTGGGTTGAAAAGGACCACAATGCTCATCTCGTTCCAATCCCCCTGCTATGTGCAGGGTCGCCAACCAGCAGACCAGGCTGCCCAGAGCCACATCCAGCCTGGCCTTGAATGCCTGCAGGAATGGGGCATCCACAACATCTTTTTTCCCAAGGATCTTTTCTCACTGTTTCACGTAATCGTATGTAGTTCTGTAGGCTGCATGAAATGTTTGCAAGGAACAAGAAGGCAGCGTTTAGGAGCGGACCTGAAGGTGACATCTGGATCTGCGCTGCGGTCTGTTTGGACTTTCCTCCTCCAGAGCACTCATCTCACTGTTTCAGGTAGAGCCTTCAGAGCTGGGAGGAGGAGGCATGGAGGAGGCACCGGAGGACGTGTTGGAACGGTTGGCACACACTGAAAAGCTGGTTGTGCAGCTGAAGGATTTGATCCGAGAGAAGGATGCCCTGCTCCAGCAAAAGGAAACTGTGCTCAAGGTATCATCTCTTTAACTGAGGGCCTAACAGCAAGTGTGAAATGCTGTGTCACCACCTTTCTTTGCAGGCCACTTTGTGCTTTTTTTTCTGCCACTGAGATGCCTGCACTACTGAAGGCATCGGTTTCGCACGGTGTTGGGATTCCAAAGAGTGGGCCCCAACCTAAGAGATGTGGGGTTGCTTCTGTGCCCTTTGTTTAGGTGTAGCTGCCTCTAAAGTAAAGACACGGCATCCACGTGCCGTGCATGTTTGGAGTTGTCATTGCTCTAATAAGTGAGAATATCTCTTATGTGAGTGGCTTTTTTTTTTCCTCCAGTTCAGAGCATTTGAGACCTACCAGGCTGTGCTGCAGCCTCCCAGCAGTGCAGTGGTTTATGGCATTTAAACAAATGTATGTACTGCTTATCCTGAGGCATCGCAGGAGAGGAAGTGCTCAGGGACTGCATATTGTTGAGGAAGTCAAATTTAGCTTCTTCTGACTGCTAGGAGATGTCCCTTGAGTGGTCCATCCCCTGGAGAGAAACTGGCATGCAACTATTTTGCTGTCTGTAGCGCTTCAGGGCAGGTTTGTTTCCTGAATGTTTTAATTAGCATTGGTAGTGAGAGCGGTTTGGGCAAAGAAACAGACTGACAAAACTGCAGCTTTTATGAGAGGTGCTTTCAGGGTGATGGACAGCACTGGGATGTTAAGCACAGCCCGTTGGTGTTCTTCTCTTCCAGGAGGAACGCGAAGCTGCGGATGCAAAGCTGATGAAACTGAAACTTCAGGCCAAAGCCAAGCTGGCATCTCTGAACAAACGCATCGAGGAGCTGACAGAGAAGGGCCCTCTGCTGCCTGCACAGGCCGTGCCAGAAGAGCAAGTGTGTGCTAAGGTTGGGAGAGCAGGGCTACTTCAAAGGTCTGGGGTTGCCCTATATTGTAGGTCCCGTCTCTCTGTTGTTAGCAGGACATTCATAGACCCAGCAAGGATATCTGCTGCAGGAGGCTGATCTGTCCAGCTTCTCTGTACTGCAGACCTCCTTGGGGTGCACAGCTTGATCTCCTCAAGGGTGTGGGTCAGTTGCATGTTGTGTATCTTCAGTGCTCTCTTACAGTTAAGCACTTGCAGAAATCACATCTGTGTGTGAGCTGTAGGTTTCCAAAGGTTACCTGTTCTGGTAGCACAGTGAAAGAAAGGGGAGGTAGGCTTGAATACAAGACAGAAGGAGTAATTGAAAGGCAGTAACTGAAACGTAAGATGGTATTTCTGAGCCATGTATCTTCCTGTGCCTCTGCAGTGTGTCGTGGTAAGCAGGAACTGTTTATGTGAACCTGGGTAAGAGTAGGGGCAGGAATGTCCCATACCAGGACGTTAAAGAAGCTGCTGACTGCTGGTTAGGAGCTACCACAGGCATTGCTGAGTTTATGAGAACTTTGGAAATTGTTCTGCTGACCACATCATCTTTTTGGGAAAAACTTTGTATGAGTGGGTAAGAGGAGTGAGATAGGAGCCCAGAGAGGTCACAGCACGTTGGGTGATCTTCAGTCAACCAGGGAGGATAAAAGTGATTAGAAACACTGGGGAGAGAAAAGAAGAAGTTAAGAATTATAGGGAAAGTCAACAGGTGAGTGTGAGTGTATGAAAATGTGAGCTTTTAGGGTAGGGGTGGTCACTGATTGAAGAGCACCTGAGAGGCAATCCTGAAAAGACGTTGATCAAAGGTTTGTGGGTTCACCAGAGGCAAGCCCTGCTTCACAGACTGCATCTTCTTCTAAGATGAGATCCCCACCCAGCTGACTGAGGGTTGGTGTCTGTCTTTTGGATTCCAGTATAACTTTGAGGCTGTTTCTCACGGTATCCTTTGGACAAAGCATCCAGCATACAGGTAGCCAAAACCACAGCGCAGTGGGTGAGCAGTTGGCTGACAGGTGGGGTTCCGTCAGGCTGCAGGGGTTCTGCAGGGCTCTGTTTCAGGGCCACGTCTCTTGGATGCTTTCGTCAATGACTTAAATGTCTTGAAGGTGTGCTGAGTAGGTTTGTGACAGCGCTGAGCCAAGCTTAGAAGAAACACAGTTTCCTCTTAGGGCTGTGACAGGAAGGTCGGCACCAAGAAGGTGGCCCAGCTGTTGAAGTGGGAGGGCTTTACTGGCAAAGAAATGAAGTGAGAAAGAAAAAAAGAGGTTCTTTTTTTTTTAAAGTAGTAAAAAATAAGCATATGGTTTGAAGTCAAATGCAGTAGTGTTTGGTTTTTGGTTTTTGTTTTAAATGGACAGTAATAATAATAGTTTTTTCTTATGTTTCTTCCTGATTCTTTCTTGCAGTGTCAGAATAGCCAAAATGCAGGAGAAGAGCACAGGGACAAAGCTGAGGGTCTGGAAGAGCAGCTCAGGGAGCAAGAAGAGGCTGTTAAGGATCTGAAGGAGCAGCTGGCTTTAGCCAAGATGAATCTGAAAGATGCTGAAGTCAAATATGCATCACAGGTACATGGAAAAAAAAAAGGCTGTTTTTTCTCTAATTGTGTGTTTCTTACCCCACCACTTACACTGATGTAATTCATGAGAGAGGGTTATTCGTGGTGACGCTGCTGCTGCTGCTGGTGGGGATGTGCCCAAATCCAGCAGTCTTGGCAGAAACCAGACTACCAGGGCTCCAGCAACCCAGGTCCATCCTGAGGGCATCGTTCCTGGCAGCTGTGTGTGCACAGACAGCCATACAGCACGGCACTTGTATGAGAGCACACAAAGTACCCATTTTTCTCTCTGTCAGCTCAGATGAGAGAACAGTGAGAATATGCACAGTAGCAGGTTTGGATCCTCCCAACAGCTGTGCTCAGGCACTCAGTCTGCCCAGTAACCAGCCCTTCAACCCCAGTCTCCCCAGTTCCTGGCATGTGGATGTGCAGAACCACAGGGCTTTTGGTCCTGTTCCACCTGTAGGCGCAGACCTGCTGGTCTTGCACACGTGCACAGAGCTGACTGGGATTCCCCTGGTCCCTCTAACAGCTAGCCCTCTGTGCTGCTCTCCTTGGAGAGGTGCAGATACACACAGACCCTCATAATACACAGCTGCACACCCTTCTCTCTGCTAACTGGCTCCCTGCTAACTGGCTCCCTGCTGTCTCCCCCTCAGAGATGTGGAAATACACAGAAGCAGGCCTTGCTTTTAGAGCCCCCCGAGGTGCTGCGGTCCCTCTGGTATAGCTGGAGCTCCCTTGGCCATCCAGCAGCCAAGCCTGCTTGTGATATTGACCTTGGGTGCTATTCCCCATCTTCCCCTTTTGAGGGAGGGGGTCTGCCACTCTGCTCTGTGCTGGGACACCTCACCTGGAACACTGCATCCAGATGGGGAGTCCTCAGCACGGGAGAGACACAGACCTGTTGGAGTGCATGCAGAGGAGGGCCACAAAAGTGATCCAAGGGATGGAACACCTCTCCTATGAGGACAGGCTGAGAGCTGGAGCCGTGCAGCATGGAGAGGAGAAGGCTGCGAGGGAACCTGAGAGTGGCTTGTCAGTATCTACAGGGGAGCTGCAGGAAGGAAGGGGACAGACTATTGAGCAGGGTCTGTGGTGATAGAACAAGGGGAAGAGGCTTCAAGCTCAAGGAAGGGAGGTTTAGGCGGGATATAAGGAAAAGGTCTCACACAGTGAGGCTGGTGAGGCACTGGCACAGATTGCCCAGAGATGCGGTGAGTGCCCCATCCCTGGAGACTTTCAAGGCCGGGCTGGATCAGGCCCTGGGCAGCCTGATGGAGCTGTGGTGTCCCTGTGCGTTGCAGGGGAGTTGGGCTAGGTGGCCTTTAAAGGTCCCTTCCAACTCCTAAGGATTCTGTGATTGTGCTTGCCACGTCGTCTGGCTTGGAGTTAGCTGGTGGCGATGCATGCAGCTGCATGCTCTCACTTGCTCCAGTTGCCAGCACCACAGGCACGTGAGCCCTTTGGCCCCCAGCCTGACTCTGCTGGGATTTCCCCTCGGAGGACAGAAAGTGTTTCCCACAGAAAAGAGAAAGAATTAGAGGTAGAATTCAGCGAGACACAGCGCTGATCAGATGCAGGGTATGGCCAGGCAGCCGTGTCCGCCAGCTGCTGTTACACACAGCTTGCTTTTTTAATACCATTGCCATCTATTTCGTGCTTGCTCCTCCCCAAACATCCTAGATCCACGCACTTCCATGTTTTTGGTTCTTCCTCCAAACACCCCACAGTCAGTCTGTGCAGTCCCAGAGCATTCCTTGGCTGCTTCCCACTCTGTATCCCTCAGCAGTATCCACCACTTCGTGCAAGAGGTGCTCAGCAGTCCTCTTACTGACTTTCCCTCTTGGATAATGAGAGAGGTGGCTCTCTGAGCAGGAGGTGAGGAAGCAGCCCCTGGATGGGGCTGGACAGGGAAGCCCTGGCTCTGAGTTGGGATTTGGGCTCCTGCCTGCTGTTGTGCTTTGGTGGATGGGTTTTGGCATCTTATCTCTGTCCTGGGGTGAATTTTGGAGCTTCTCCCATCCCCACCATTGCCCCTCTCTGCTTTGCTGGGGATGAGCAGATGAGATTGTTATCTCATAGCAGCTGCTGCTGTTGTGGTCATTTATGGAGCGGTTGATGTTAGTGCTCTGGATGTCAGTTTTAGCTTCTGATCCTGGATCCTGTGCAAATACAAAACCAGAAGCTTTACCAGCTCGTAGTCACATAGCCATTATGTTTTATCATTCTGCATATTTATAAATGAAAATCATACGTTCTTTGCTATGTGGGATCTGAACTATCACCAGTTAATGTCTGTGTGTTCATAACTGATGAAGTTATTGTCTGACCAAAGCCACACAGAGGGTTGCCAGGCTGCAGACATCTCCATACACGTGTTGTGTGCTGATGCTGAAAGCACTCCTGCGCGTCTGTCAGCTCATAACCCTCTTGGGTTATGTTTGCTTGTTTCTTTTTCCTCTCTGGGTCCTGTATTGTGGTTTCTGTTTTCAGCTGAGCTCGCTGCAGGAAGTAATTCAGGAGAAGGAAGCTCTCCTCCAGGAGCAGGCTCACCAGCACCAAGCTGAACTGCTGAGAACAGCAGCCAAGGCAGATCAGGAAGCAGAGGTGCAGCAGGTATGTTTGTTATAACCACCTTGTTATGAAGAGGAGAAAAGAAAATGCGTTTTATGAGTCAGAGGATGCGCTGGGAAAGCTGTTCTCAGCAAAGGGAGCAGGGCTGAGAAGAGGTGGGAGCTGATGGCTTGGGGCTGTTCGGTTGCTTTTAGAAGGATATTTCACGCTTCCAGAGTTCCCCCTTCAGTCTGCTGAGCTGCTGTAAGACTTCCCTTGCTGCAGGTAGGAATGTGGTTATTGTTGAATGCAGTGGCAATGCTGCCATTCCATGTTTGTGAAACAGCGGTGAGCTCTCCTTGCTGCACTTTGGTTCCCTGGTAAACATCAATAGGAGCAAAGTTTTTTGGGCCATTCAAGTAACTGTTGCAAGAAGTGGGTGGGTGGGTGAAACTTAGGGGATGTGGTCCTTTTGCAGTAGCTTCTCAGGGACTCTTACAGACCCATCTCCCACAGAACCTGCGTACACTGCAGAGGAAGCTGGAGGAGCGAGAAGAAGCTCTGCTGGGACAAACGCAGGCAGTGGAACTGCTGCAGCAGGAGCTACGTGAGGCTGAGCAACAAAACCAGGTGCCCTTTTGTGTGCTTGTTGCTTTCTCCCATGTGTCTGAATGGCTGGAGGAGCTTTGTACTGACAAAGCATGGAAGTTCCCAGATGGGCTTATTGCTGGCACAAGGATTGCAGCACGGAATCAGCCAGCAGGTTCATTAACATACTCCTTCCTTTCAAAGAAGAGCACAGATTTAATTGCAGCAGAAAAGGAAATAAAATGAATACACTACGCAGAGAATGTTTCAGTAATAATATTATTGCCCACATCTTCTGATGTCTGCCTCTATTTCTGGCCTTATGGTCCCCCCCACAACATTCATGGCTTCCATCTGTTTGAGGATGAGGGATACAGCGAGCTGTCAGCACCGCAGACCTCCAAACAGGGGGGATTTGGTGTTTGTGGCTCATTCCTCGACCCAGGATCCACTTGGAGCCATTTTTATGATTGCTGAGATGGTTTTACACCTCTTTGTTAGTCCACAAGTTCATGCTGTGTCTGATT')


Out[27]:
9725

In [19]:
!ls -lah ../outputs/moleculo/unmapped_reads.bam


-rw-r--r-- 1 irberlui ged-lab 5.1M Apr 18 15:49 ../outputs/moleculo/unmapped_reads.bam

In [18]:
%%bash
module load samtools
samtools view -b -f 4 ../outputs/moleculo/LR6000017-DNA_A01-LRAAA-AllReads.sorted.bam > ../outputs/moleculo/unmapped_reads.bam


application-specific initialization failed: Can't find a usable init.tcl in the following directories: 
    /opt/anaconda1anaconda2anaconda3/lib/tcl8.5 /usr/lib/tcl8.5 /lib/tcl8.5 /usr/library /library /tcl8.5.13/library /tcl8.5.13/library



This probably means that Tcl wasn't installed properly.


Lmod Warning: samtools not found, loading: SAMTools/0.1.19