In [56]:
with open('mfd_XR-seq_exp1_trimmed2.fastq') as f:
    fastq = f.readlines()   
fastq = [x.strip() for x in fastq]

In [57]:
TT_count=0
neg=0
for i in range(1,len(fastq),4):
    seq=fastq[i]
    if seq[7:9] == "TT":
        TT_count+=1
    else:
        neg+=1
        
print(TT_count)
print(neg)        
sums= TT_count + neg
percent=TT_count/sums
print(percent)


11151641
392525
0.9659979768135697

In [3]:
90658


Out[3]:
'@M01390:56:000000000-BBP8Y:1:1101:14418:1577 1:N:0:1'

In [11]:
seq='ABCDSFGHT'
seq[7:9]


Out[11]:
'HT'

In [13]:
infile=open('wt_test_sampled.fastq')
outfile=open('wt_test_sampled_header.fastq', 'w')
for line in infile:
    if '@' in line:
        pass
    else:
        if 'GGCTCAGTTCGTATGAGTGCCG' in line:
            line=line.strip()
            #print(line)
            start=line.find('GGCTCAGTTCGTATGAGTGCCG')
            end=start+len('GGCTCAGTTCGTATGAGTGCCG')
            iden=line[end:end+8]
            outfile.write('@'+iden+'\n')
            outfile.write(line+'\n')
            one=infile.readline()
            two=infile.readline()
            outfile.write(one)
            outfile.write(two)

        


infile.close()
outfile.close()

In [ ]: