import pysam

samfile = pysam.Samfile("test.sam", mode="w", referencelengths=[100], referencenames=["testchr"]) read = pysam.AlignedRead() read.seq = "AAAAAAAAAA" read.pos = 2



In [ ]:

    
#This code executes manual unit testing of the sequtil by Ali.



In [23]:

    
#Creating sequence to later create reads.
sequence = (
"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
"TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA"
"TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC"
"ATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG"
"CCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAA"
"GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC"
"AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTG"
"AAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTT"
"GACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTT"
"GCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGC"
)



In [2]:

    
len(sequence)









    Out[2]:





700



In [24]:

    
# Ouput to see what "reversed( sequence )" looks like.
#for i in reversed( sequence ):
#    print i



In [3]:

    
#Used in establishing values for the reverse strand of 'sequence'.
def reverse_complement(seq):
    matches = {"A": "T", "T": "A", "G": "C", "C": "G"}
    return "".join(matches[i] for i in reversed(seq))



In [4]:

    
#Creates output fastq file. Inserts into output file 10 sequences that are 35 bases long from the original sequence.
#Each sequence is a shift to the RIGHT by 1 of the previous sequence.
#'+' is an optional line. Not used here.
#The character '!' represents the lowest quality while '~' is the highest. Here are the quality value characters in left-to-right increasing order of quality
#This essentially creates 3'-> 5' reads of 1 strand.
with open("test_SE_plus.fastq", "w") as outfile:
    for i in range(10):
        outfile.write("@TEST_%d\n" % i)
        outfile.write(sequence[i + 20:i + 55] + "\n")
        outfile.write("+\n")
        outfile.write("~" * 35 + "\n")



In [5]:

    
#Creates output fastq file. reverse_complement() is used. The same type of file as that above is created, though with the
#difference that it's created on the reverse compliment of the original sequence.
#This essentially creates 3'-> 5' reads of the reverse strand in relation to the reads created above.
with open("test_SE_minus.fastq", "w") as outfile:
    rev = reverse_complement(sequence)
    for i in range(10):
        outfile.write("@TEST_%d\n" % i)
        outfile.write(rev[i + 20:i + 55] + "\n")
        outfile.write("+\n")
        outfile.write("~" * 35 + "\n")



In [6]:

    
#Aligning reads created above.
#We can execute paired reads because we have reads from the reverse strand, which can therefore be translated to the 5' end
# of the initial strand established with 'sequence'.

#Aligning short sequence "plus" file created above with wild type.
!bowtie2 "/home/pphaneuf/sequencing/NC_000913_2/NC_000913_2" test_SE_plus.fastq -S test_SE_plus.sam

#Aligning short sequence "minus" file created above with wild type.
!bowtie2 "/home/pphaneuf/sequencing/NC_000913_2/NC_000913_2" test_SE_minus.fastq -S test_SE_minus.sam

#Aligning paired-end reads "plus" and "minus" files created above with wild type.
# -x indicates the basename of the index for the reference genome, which is in our case NC_000913_2
#
# 1000 is the argument "<bt2-idx>" within bowtie2 documentation. Probably shifting of start index within NC_000913_2 wild type reference genome.
#
# Paired-end sequencing enables both ends of the DNA fragment to be sequenced. Because the distance between each paired read
# known, alignment algorithms can use this information to map the reads over repetitive regions more precisely.
# This results in much better alignment of the reads, especially across difficult-to-sequence, repetitive regiosn of the
# genome.
!bowtie2 -X 1000 "/home/pphaneuf/sequencing/NC_000913_2/NC_000913_2" -1 test_SE_plus.fastq -2 test_SE_minus.fastq -S test_PE.sam

#Aligning paired-end reads "plus" and "minus" files in the reverse order from the previous above command with wild type.
!bowtie2 -X 1000 "/home/pphaneuf/sequencing/NC_000913_2/NC_000913_2" -2 test_SE_plus.fastq -1 test_SE_minus.fastq -S test_PE_rev.sam









    



10 reads; of these:
  10 (100.00%) were unpaired; of these:
    0 (0.00%) aligned 0 times
    10 (100.00%) aligned exactly 1 time
    0 (0.00%) aligned >1 times
100.00% overall alignment rate
10 reads; of these:
  10 (100.00%) were unpaired; of these:
    0 (0.00%) aligned 0 times
    10 (100.00%) aligned exactly 1 time
    0 (0.00%) aligned >1 times
100.00% overall alignment rate
10 reads; of these:
  10 (100.00%) were paired; of these:
    0 (0.00%) aligned concordantly 0 times
    10 (100.00%) aligned concordantly exactly 1 time
    0 (0.00%) aligned concordantly >1 times
    ----
    0 pairs aligned concordantly 0 times; of these:
      0 (0.00%) aligned discordantly 1 time
    ----
    0 pairs aligned 0 times concordantly or discordantly; of these:
      0 mates make up the pairs; of these:
        0 (0.00%) aligned 0 times
        0 (0.00%) aligned exactly 1 time
        0 (0.00%) aligned >1 times
100.00% overall alignment rate
10 reads; of these:
  10 (100.00%) were paired; of these:
    0 (0.00%) aligned concordantly 0 times
    10 (100.00%) aligned concordantly exactly 1 time
    0 (0.00%) aligned concordantly >1 times
    ----
    0 pairs aligned concordantly 0 times; of these:
      0 (0.00%) aligned discordantly 1 time
    ----
    0 pairs aligned 0 times concordantly or discordantly; of these:
      0 mates make up the pairs; of these:
        0 (0.00%) aligned 0 times
        0 (0.00%) aligned exactly 1 time
        0 (0.00%) aligned >1 times
100.00% overall alignment rate



In [7]:

    
!samtools view -bS test_SE_plus.sam  > test_SE_plus.bam
!samtools view -bS test_SE_minus.sam  > test_SE_minus.bam
!samtools view -bS test_PE.sam  > test_PE.bam
!samtools view -bS test_PE_rev.sam  > test_PE_rev.bam









    



[samopen] SAM header is present: 1 sequences.
[samopen] SAM header is present: 1 sequences.
[samopen] SAM header is present: 1 sequences.
[samopen] SAM header is present: 1 sequences.



In [8]:

    
!samtools sort test_SE_plus.bam test_SE_plus_sorted
!samtools sort test_SE_minus.bam test_SE_minus_sorted
!samtools sort test_PE.bam test_PE_sorted
!samtools sort test_PE_rev.bam test_PE_rev_sorted



In [9]:

    
from sequtil import makegff



In [10]:

    
makegff.write_samfile_to_gff("test_SE_plus.bam", "test_SE_plus.gff")
makegff.write_samfile_to_gff("test_SE_plus.bam", "test_SE_plus_5.gff", five_prime=True)
makegff.write_samfile_to_gff("test_SE_minus.bam", "test_SE_minus.gff")
makegff.write_samfile_to_gff("test_SE_minus.bam", "test_SE_minus_5.gff", five_prime=True)
makegff.write_samfile_to_gff("test_PE.bam", "test_PE.gff")
makegff.write_samfile_to_gff("test_PE.bam", "test_PE_5.gff", five_prime=True)
makegff.write_samfile_to_gff("test_PE_rev.bam", "test_PE_rev.gff")
makegff.write_samfile_to_gff("test_PE_rev.bam", "test_PE_rev_5.gff", five_prime=True)









    



/home/pphaneuf/sequtil/sequtil/makegff.py:99: UserWarning: 5' only data should not have been processed as Paired-end.
  warn("5' only data should not have been processed as Paired-end.")



In [11]:

    
#Values starting @ 21 are index of start of reads (reads created started at 20 and ended at 55).
#Values starting @ 1 are the number of reads (depth?) for this position.
#'-' indicates forward strand.

#Test: Test the start index of the read, the score and the direction of the read for single read.

!cat test_SE_plus.gff









    



gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	21	21	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	22	22	2	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	23	23	3	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	24	24	4	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	25	25	5	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	26	26	6	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	27	27	7	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	28	28	8	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	29	29	9	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	30	30	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	31	31	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	32	32	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	33	33	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	34	34	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	35	35	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	36	36	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	37	37	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	38	38	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	39	39	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	40	40	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	41	41	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	42	42	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	43	43	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	44	44	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	45	45	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	46	46	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	47	47	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	48	48	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	49	49	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	50	50	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	51	51	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	52	52	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	53	53	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	54	54	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	55	55	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	56	56	9	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	57	57	8	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	58	58	7	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	59	59	6	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	60	60	5	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	61	61	4	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	62	62	3	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	63	63	2	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	64	64	1	+	.	.



In [2]:

    
!cat test_SE_plus_5.gff









    



gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	21	21	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	22	22	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	23	23	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	24	24	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	25	25	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	26	26	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	27	27	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	28	28	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	29	29	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_SE_plus.bam	30	30	1	+	.	.



In [12]:

    
#Values starting @ 637 are index of start of reads (created from forward reads started at 20 and ended at 55).
#Values starting @ -1 are the number of reads (depth?) for this position. ???Why negative value?
#'-' indicates backward strand.

#Test: Test the start index of the read, the score and the direction of the read for single read.

!cat test_SE_minus.gff









    



gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	637	637	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	638	638	-2	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	639	639	-3	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	640	640	-4	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	641	641	-5	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	642	642	-6	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	643	643	-7	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	644	644	-8	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	645	645	-9	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	646	646	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	647	647	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	648	648	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	649	649	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	650	650	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	651	651	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	652	652	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	653	653	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	654	654	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	655	655	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	656	656	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	657	657	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	658	658	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	659	659	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	660	660	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	661	661	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	662	662	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	663	663	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	664	664	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	665	665	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	666	666	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	667	667	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	668	668	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	669	669	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	670	670	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	671	671	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	672	672	-9	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	673	673	-8	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	674	674	-7	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	675	675	-6	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	676	676	-5	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	677	677	-4	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	678	678	-3	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	679	679	-2	-	.	.
gi|49175990|ref|NC_000913.2|		test_SE_minus.bam	680	680	-1	-	.	.



In [13]:

    
#Test: Test the start index of the read, the score and the direction of the read for paired-end read.

!cat test_PE.gff









    



gi|49175990|ref|NC_000913.2|		test_PE.bam	21	21	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	22	22	2	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	23	23	3	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	24	24	4	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	25	25	5	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	26	26	6	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	27	27	7	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	28	28	8	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	29	29	9	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	30	30	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	31	31	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	32	32	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	33	33	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	34	34	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	35	35	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	36	36	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	37	37	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	38	38	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	39	39	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	40	40	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	41	41	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	42	42	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	43	43	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	44	44	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	45	45	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	46	46	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	47	47	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	48	48	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	49	49	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	50	50	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	51	51	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	52	52	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	53	53	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	54	54	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	55	55	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	56	56	9	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	57	57	8	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	58	58	7	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	59	59	6	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	60	60	5	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	61	61	4	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	62	62	3	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	63	63	2	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	64	64	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	637	637	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	638	638	2	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	639	639	3	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	640	640	4	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	641	641	5	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	642	642	6	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	643	643	7	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	644	644	8	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	645	645	9	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	646	646	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	647	647	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	648	648	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	649	649	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	650	650	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	651	651	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	652	652	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	653	653	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	654	654	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	655	655	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	656	656	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	657	657	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	658	658	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	659	659	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	660	660	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	661	661	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	662	662	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	663	663	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	664	664	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	665	665	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	666	666	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	667	667	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	668	668	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	669	669	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	670	670	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	671	671	10	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	672	672	9	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	673	673	8	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	674	674	7	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	675	675	6	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	676	676	5	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	677	677	4	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	678	678	3	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	679	679	2	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	680	680	1	+	.	.



In [14]:

    
#Test: Test the start index of the read, the score and the direction of the read for paired-end read.

!cat test_PE_rev.gff









    



gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	21	21	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	22	22	-2	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	23	23	-3	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	24	24	-4	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	25	25	-5	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	26	26	-6	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	27	27	-7	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	28	28	-8	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	29	29	-9	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	30	30	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	31	31	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	32	32	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	33	33	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	34	34	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	35	35	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	36	36	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	37	37	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	38	38	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	39	39	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	40	40	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	41	41	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	42	42	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	43	43	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	44	44	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	45	45	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	46	46	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	47	47	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	48	48	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	49	49	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	50	50	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	51	51	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	52	52	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	53	53	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	54	54	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	55	55	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	56	56	-9	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	57	57	-8	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	58	58	-7	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	59	59	-6	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	60	60	-5	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	61	61	-4	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	62	62	-3	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	63	63	-2	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	64	64	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	637	637	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	638	638	-2	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	639	639	-3	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	640	640	-4	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	641	641	-5	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	642	642	-6	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	643	643	-7	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	644	644	-8	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	645	645	-9	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	646	646	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	647	647	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	648	648	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	649	649	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	650	650	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	651	651	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	652	652	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	653	653	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	654	654	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	655	655	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	656	656	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	657	657	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	658	658	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	659	659	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	660	660	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	661	661	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	662	662	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	663	663	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	664	664	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	665	665	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	666	666	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	667	667	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	668	668	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	669	669	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	670	670	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	671	671	-10	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	672	672	-9	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	673	673	-8	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	674	674	-7	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	675	675	-6	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	676	676	-5	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	677	677	-4	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	678	678	-3	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	679	679	-2	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	680	680	-1	-	.	.



In [15]:

    
!cat test_PE_5.gff









    



gi|49175990|ref|NC_000913.2|		test_PE.bam	21	21	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	22	22	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	23	23	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	24	24	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	25	25	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	26	26	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	27	27	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	28	28	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	29	29	1	+	.	.
gi|49175990|ref|NC_000913.2|		test_PE.bam	30	30	1	+	.	.



In [1]:

    
!cat test_PE_rev_5.gff









    



gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	671	671	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	672	672	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	673	673	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	674	674	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	675	675	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	676	676	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	677	677	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	678	678	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	679	679	-1	-	.	.
gi|49175990|ref|NC_000913.2|		test_PE_rev.bam	680	680	-1	-	.	.



In [40]:



In [ ]: