notebook.community

Edit and run



In [56]:

    
with open('mfd_XR-seq_exp1_trimmed2.fastq') as f:
    fastq = f.readlines()   
fastq = [x.strip() for x in fastq]



In [57]:

    
TT_count=0
neg=0
for i in range(1,len(fastq),4):
    seq=fastq[i]
    if seq[7:9] == "TT":
        TT_count+=1
    else:
        neg+=1
        
print(TT_count)
print(neg)        
sums= TT_count + neg
percent=TT_count/sums
print(percent)









    



11151641
392525
0.9659979768135697



In [3]:

    
90658









    Out[3]:





'@M01390:56:000000000-BBP8Y:1:1101:14418:1577 1:N:0:1'



In [11]:

    
seq='ABCDSFGHT'
seq[7:9]









    Out[11]:





'HT'



In [13]:

    
infile=open('wt_test_sampled.fastq')
outfile=open('wt_test_sampled_header.fastq', 'w')
for line in infile:
    if '@' in line:
        pass
    else:
        if 'GGCTCAGTTCGTATGAGTGCCG' in line:
            line=line.strip()
            #print(line)
            start=line.find('GGCTCAGTTCGTATGAGTGCCG')
            end=start+len('GGCTCAGTTCGTATGAGTGCCG')
            iden=line[end:end+8]
            outfile.write('@'+iden+'\n')
            outfile.write(line+'\n')
            one=infile.readline()
            two=infile.readline()
            outfile.write(one)
            outfile.write(two)

        


infile.close()
outfile.close()



In [ ]: