In [56]:
with open('mfd_XR-seq_exp1_trimmed2.fastq') as f:
fastq = f.readlines()
fastq = [x.strip() for x in fastq]
In [57]:
TT_count=0
neg=0
for i in range(1,len(fastq),4):
seq=fastq[i]
if seq[7:9] == "TT":
TT_count+=1
else:
neg+=1
print(TT_count)
print(neg)
sums= TT_count + neg
percent=TT_count/sums
print(percent)
In [3]:
90658
Out[3]:
In [11]:
seq='ABCDSFGHT'
seq[7:9]
Out[11]:
In [13]:
infile=open('wt_test_sampled.fastq')
outfile=open('wt_test_sampled_header.fastq', 'w')
for line in infile:
if '@' in line:
pass
else:
if 'GGCTCAGTTCGTATGAGTGCCG' in line:
line=line.strip()
#print(line)
start=line.find('GGCTCAGTTCGTATGAGTGCCG')
end=start+len('GGCTCAGTTCGTATGAGTGCCG')
iden=line[end:end+8]
outfile.write('@'+iden+'\n')
outfile.write(line+'\n')
one=infile.readline()
two=infile.readline()
outfile.write(one)
outfile.write(two)
infile.close()
outfile.close()
In [ ]: