In [110]:
def readBases(filename):
fastaData = {}
with open(filename, 'r') as f:
Rosalind_ID = ''
for line in f:
if '>' in line:
Rosalind_ID = line[1:].rstrip()
elif Rosalind_ID in fastaData:
fastaData[Rosalind_ID]['Sequence'] = fastaData[Rosalind_ID]['Sequence'] + line.rstrip()
else:
fastaData[Rosalind_ID] = { 'Sequence' : line.rstrip() , 'Percentage' : 0 }
highest_percentage = 0
highest_percentage_ID = ''
for RosalindID, Data in fastaData.items():
length = len(Data['Sequence'])
gc_count = 0
for base in Data['Sequence']:
if base == 'G' or base == 'C':
gc_count = gc_count + 1
percentage = ( gc_count / length ) * 100
fastaData[RosalindID]['Percentage'] = percentage
if(highest_percentage < percentage):
highest_percentage = percentage
highest_percentage_ID = RosalindID
print(highest_percentage_ID)
print(highest_percentage)
print(fastaData)
In [111]:
readBases('rosalind_gc.txt')
Rosalind_5726
51.3317191283293
{'Rosalind_8952': {'Percentage': 49.40923737916219, 'Sequence': 'TTCGGGTTAGCATTATTTATGGTAGTCAAAGCCGGACCAAAGTGTAGTAAAGCACGGTACGTGCGATCTCTACCAATACACATCTGAATCTTTTATTATGAGTTCCCGATAGCGCTTAAT\nTCGAAATCGGTGGTTCTCGGGTTAGCGAGGGATTCTCATCATATCACGTGCGGAGTTCAC\nCACGAGTCGCGGTCATTAGAAAGGAACGATGGGACGCTATGTAACGGTAAGTTTGTGGCG\nCAACTTTATGCAGCTCTTCCCCGTACCTCCATTATAGAGTAACACTCATGAGAGCAGATG\nGCCTCGGCCCAGTCCTGAATTAACCAGAGTTTTTTAGCGCAATCTGATGAAAGAATGGCC\nCGGCCATGAGACACACGATACTCATCAGGAATGGTCTCTTGTTGGCGCGCTTTCGGCATC\nCTGTTACCGGACGGTGGGTGAACCACTCAATTATCCCATACGGACCACCCAGTATTCAGG\nACTGACACGAACCTTTCTGAGCTTACCGAGAGTTCGTTTAAGATAGTGTGACAATGAAGC\nGGAACGTGCTGGCTAGACAATCACGTGCCTCGAGTGAATAGGCCGCCCCACAGATGCATA\nACGTCGTCTTTGTGGTTATACGCGAAAATCTATTGGCACCGCTAAATGCGTAGCGCCAAC\nGGAGTTGTGAGTCCCCCGAAAGGGGCGGACTGGTTTCACCATGATGCTCCATGAGGATTA\nCCCTGTGCGTCTAGGAAGTTACGCTCCCGTAACACTAATGCTTATAAACTTTCCCCAGGG\nGTCTGCCTACGCAAAAAGCGTACACCGCGAGTCAACTACTCCCGCGTACTTCCCTGGTCC\nTCGACGCGCTGCACGGGTCATAACCTATCCCAGACCCTTTTCTTAAACAGCGAAGGACAC\nTACGCTCGGAAACCAG\n'}, 'Rosalind_8209': {'Percentage': 50.427350427350426, 'Sequence': 'CCCATCTATGATACCCGAAAACGCAGACGGCAAACCTACCTGGGGGGAATGGGATGAGGGACTCGCGGACACCTTGGCCTCTGCGTTAGAATTAGGTAGAAATTACACAAGTTACGGGCT\nGAATCAAATGCGTAACCCTTGTCAAGGCTGAACAGATATTTAGGAGAATCAGAGGTCCCT\nAGATCAGAATCCCCCTACGTAGCGCCCCCTCTTATAATGGGTACTCATGAAGCCGGACTT\nAATACCTCTCCCTAACCATTCAGACAATAGTTTGAGCAGCGATTTTGCGGACTTACATCC\nGTTCCCACCTAAGGACGTAGGCAACACTGAGCTTGGCGGTTCACCTTCCCTACGCCATCG\nGGAAGGGCCCAAATGTTTAGTTCGGGACGCGTCCGCTAACGCTCCTACCTCGATCATGTG\nGCTTGTAAAACGTCCTAAGGCTCCCCTGGACGCCCCCAATGTAATATATTCCTAAACGCT\nTACTCTAGTAAAATCATGTTTTTTGCTTCGGCGTATTGTTAACGTCAGTGACATCACCCC\nGCACCGATTTAGTTCCCAGATCGCGGCGCCCCACAGAAGCCGCGTCTATTCTGAAACCGG\nGGCCGCTGGTACTCGGGTTTCTTCTGACTCCCTTTGTCATCCTTCGCTCTCAGGTAGAAG\nCGGTCGGCACCTGAAACACCTCTTGTGGTATCTATCAATAGTGCGCGGGACTTGGGGGTC\nTCGCCTGTCCACGCAGTGAGGATGACTGAAAACGTAGGCTTGTATTCTGCTGTATGATTC\nTAGCCTGTAATTGTGGATGGGGTTCG\n'}, 'Rosalind_6771': {'Percentage': 46.808510638297875, 'Sequence': 'TCTCGCACCAGCAAGATAATTGGGTTACATTGGACATAACCGAAATGACGCCTGCATGGTCGTAAATCAGATAAGAGTTGTAGGATATTTGAGCCTCAGGGGGCATCATGTTATCTTGCG\nTACGTGATCACCCTATCTAATGTGTTCTCCGGACCAGGGTGCATCCCACGAAATCAACTC\nGCGCGTCCGGACCACGGCGGTCTTTTCTTCCACCATCCAATTCTATCAAATTTTTATGTT\nGGGAACACGGCCCCGACAGTCTGAGTGCCTTAGCACCGGATGACTTCCAGAGATTTGAAT\nCTACTAATTGATTGTTATATACGAGGAGCGATTGGACAACGTTGATGACAACGCGTCGTA\nACAAAGATATTAATAGCAGCATTTCTAATCAGCATAAGAAAACAATGTCGACACAGCGAT\nCAGGAACTAGATGGGCCACACCTCTGCATGTCAAGTACCGTTTCCATGGCTACAGCCCCT\nCATCCGCAGTGGATGACCCAATAGATACGCACAGGGAACGTTGGTTCCATCGCCTAGTTG\nCTATAGTATTCGCGGCCTTGAGGCGGCTTGTTGACCGTAAAACTATATTTTAAAACAGCA\nGTCCTGAGTAGATTCTTTCATATAGAGCAGGCCCCTTAATGTAAGTCGCTTCCACGTTAA\nAGTCGAGCCTATCATCGTCGTTCGGGTGGGCATTGGATTTGAGTTCCGTCAGGACCGATC\nCATCTCGTTATAGGCATGAATTCCATGGTCTCGCTGGCTCCACCAAAGCGAAATAGGATC\nATCCTGTCGCGGCGGCGCTTGTGACTTGGGGGTAGCTTAACTTGTGAGCCTGAGTTGTTC\nAGTTTATCAGGGCAATTGTCGTAATAGAAGAGCTTTCATCCAGCCAAAGCATATTTAGTT\nGAAGCCGACTGCCGTTCGGAGGACA\n'}, 'Rosalind_4643': {'Percentage': 49.94786235662148, 'Sequence': 'CACTAGTTCCGGGAGTCTGGGGAATTCAAAAGGACCAAAAAGCAAGGGTCTGTCTACCTCGTAGCACACTACCGGGCCGTTTATATCTTTGTAACGCCCGTAGTAGAGGTGGGTAGGCGT\nCGCCTTATCAAATTCCCAAAGCGCTTTTACGGGGATCGCGCGTCCGGGTCCCTCTGGACT\nGGGGGCATAACTTTCGGTGCTTAAGATTCAGTTTTATCATCCCATGGGCGATTTGCTCGG\nACCCAGGAAGGTCAGAACGCCCTGATGCACGATAGGCCATCGTTCACTCGTTTTTGAGGA\nAGGTCAGCGGATGCCACTCACATTTTCGAGAATCTGAATTACCTTTGGCCCGACTGGAAG\nACTATGTTTACCTGCTAGTCACTCAGTTGTGACTGACGTGAACTTAAAGCATGCCAATGG\nTTCCACTAGCATGCAGTCTCCCTGAGCTTCATACCTTGCCTGAAGCACGGTTCAAAACAT\nTTGCTCTGTCACAGGTGTGTATGGCGTCGTTGCGGTACCTTCGTTTAGGCTCCTTCAGGT\nGGCGAGGGTCGTTGCTAGGATAGCCCCTTGTGGCGATGCAATATTGCTCTCCGTCGCTCG\nAAGCAGACGCTGCGTTTACTATTGCCAACACTCAGGAAACCGACTTGCAGTGGTCTGAGC\nTGATGGCATAGGAAGCCCAAAATACAGAACTGACAACCTGCCAGGATATTGCGTAGCATC\nCATCGGCAAGTGTACACCGGAGAGTTGGCAGTCTCTCTTCAGAGTGGCCGCTGTTGTTCT\nTCATAATAGTCCTCTAGGGTTCTCTCTGACATGCAGTCTATGGGGACGGTTACATGACTT\nGAGGGAAAGAAGAATGATCGAGAGGGTTGGACGACGCAAAATTTTATTGTGGCTGCAAAA\nTAAACTCGCCAAGGGGCCTAGTTATAACACTCGGCGTTCTCGCA\n'}, 'Rosalind_1471': {'Percentage': 49.25201380897584, 'Sequence': 'GATGATTTACATAAACTATCCCTTACGCTCAGCAGGATCGGGCTGGGTTCCTGGCGTTAGTAGCCTGGGGCTTTCGACCTAACCTGGCTGCTACTGCAGTGGGTGGGACACATCTGACGC\nATAGGACTGTTGTAAATTCAGTGGAATCCCGGAGTCTGGTTGAGGCCCGCACAAGGTAAT\nCGAAGAAAGTGAGTGGGGTAACCCTGCCCCAAGCAAAAACTTTGTATTTTACCTCTGCCC\nTCCTGTGCCAGATGGGTTCGGAACCTATACGTCGAAAGCACTACTTTATACTCTTCGCAA\nGATCCGTCGCGAATGCGGAGGTAAGTACAAATTAACTAGTATGTCCGCTCTCCCTGCTGC\nAAGCTTATTGGTCATTCGCTTCACCCATGTGTCGGGTCGATGTCGCAAGAAAAGCATTGA\nGAAGTACCCAAAGAAGTGGCCGTCGAACTGAGACCACCTAATGGTGAACTAGATAGTACT\nTCAACGGCGACAACCAACCTAGGGACGAGCAAAATCGACGTTTTCAGTATTGGATTGTAC\nGAATTTTGCCCATAAACTACGACGACTAGGTCACGGAGGTCTATCAACCACCATGAGTGG\nCCACGGTCCCTTGTTGATCTAAATAGCCCCTCTCACCACGGAATCCTAGGTAATTTGCAC\nGGATTAAACAGCTTAGTTTCTGGTCCCATTGAGCCGAGAGGGTAGGTACACAAAACCCTA\nTGTTTCAATTAAGACGACTCAGTCTTTGTGGGGAGCGGCGGACTTCAACGGTACAGGATT\nCCTTGCGGCAGGGATGTCGGACCACGGTGGCTGCGGGGACGCATTCATTCCACTATCGTC\nACTGAGCTATCTGTC\n'}, 'Rosalind_5726': {'Percentage': 51.3317191283293, 'Sequence': 'TTACCCCTCGCGTTTAGACATGGAACTCCCACTAGGCGTGGTCTGCGGATGTAGCAACAGAAATGAACGCTTTAGTTGCCATCTGTCTTTAACACACCAAGAAGATAAGCATGATTGTTA\nCCAAACGAGCTTGGCTCTTCTTCATTTCATGATCTCCCTATTCCCTTATACCTCCCTGGG\nGACTCCACTTAGTCTGACTTGCGGCTCGAAAGAAAGGCGCGTTCCTGATTTTCCGGCACT\nCACAGAGCGATACTAACGGGTGAACCGGCGATGCAGAATTATGTGCCAAGCCAAACAGAG\nGTAGCGCCGACATTCCTCACGTTTTCACGTCTACGCATGTCCTGGAGACGTCGCCATTCC\nGTTGTATCCGGTACACAACCGCAACGCATGCCCAACCCATGAATCCGTCTGACGCTTGTT\nGTAAATGTCTGAACAGGGCGGCCTAGTTCACCGGGGAAGCTGCCCACATTGTTGAGAACT\nTAGATCTGCCCCAAGATCCCGACAGCGCTAGTTGCAGAAGCACTGGTCATCGTACACTAA\nCGTTTAGGTGGTGGCGCGTGGGCGACGTCCCGCCAACGAATACAAAGGAAATAGATAACC\nCAGCTACCAGGGCCTCTGAAATAAGGAACATCATTTATTGCAAACTGAGCAACCGCCAGC\nGCGTCGGCATAGTTTCGAGGAGCTCTTGCACTCTCTTTCCCCCTTCGCCCTGCAATGGCG\nTATAAGTGATGGAGGAGTCGCTCCGTTGCAACATGAAGGCTAGGTGGTCCCAAGATTGCT\nGGGCAGGCGAGGAGCCACCGAACACCAAGGTAA\n'}, 'Rosalind_4438': {'Percentage': 47.18004338394794, 'Sequence': 'TTGGAATGACCAGGACTCTGAGTATGCCTAGATGTTTTAGGTGATTCGTAGGGCGACCAGGGAAAGGGCCGTATATTGCAGCCATCCGTGCAGTAATTAGGTTCCCGCCCATGAATTTGT\nTGTGGCGCGACCATCGCGGAATAGATCCGATAGCCCCTCCGACGGATAAAACCGATGTTC\nTACTATTGTAGCTAGCAAGCATCCGTCACCTTTTCACTCACCTGCTACCGGCGTGCGCAC\nTTATGAATGACGAACACGGGATCAATAACGAACCGAATGGAGACGTGCGTGTTGTTTGAC\nCAGATTATTAGCCTGTACCTAATGATGACCATAAGTTTTGATTGTACCCTACATACTCAG\nATCACAATCCAGACCATACTGTCAGAGGGGACGGTAGGGCTAAATCGTGAGATTTTTGAA\nAACAGAGGGATTCAAAGGCCGATTGCTAGCAGCGCCACGACACAATGCAGATTCCGTTGG\nTGACGCTGTGATCAATCTATAAGCTTAGCGTGCACTGGCTTAGCGAACTTTCTTATTGCC\nACGCCATACCCTTGCGTACCGAACAAGCAGAAATGACAAACTGGCGACTCTGATAGGATA\nACGACGCGGATGTACTACGTTAAACTTTGATTTCATGAAACTAGTAACTACGACAATAGT\nGCCGTTGTGTCCAGATGCGAAGCAGAGACAAAGACTAGTGAGCACATTTCCGAACCGACC\nAACTGATCATCGTACAGGCGGTCTAAGGCTAACCGGGACGAGGCCTCTCAGTAGGATCTT\nTTTCACGTTCGACTCTAAAGGGGGATTCCTCGGCCCAAAAGACCCAATTCGAAATACTGC\nGGACCATTGTACGTGTGGCTTCCTGCAACATATTGATTAGTTCTCATTCTTGCCGATACC\nCTCTCGT\n'}, 'Rosalind_2538': {'Percentage': 47.923681257014586, 'Sequence': 'TAATACGCTCCGGTCGCGCTGTATTCCAGCAACCACGTCATTGGAAACATTCTCACAGGAACAATGATCGAACGCAAGATCTGAATCACGTGATTCCGCACCGTAGTCTAAGGATTAGTT\nCACTTTTACCCTCCGAGGAATTGGGCGAGTTTCGCGATCTAATCAATGGTCTGAATGTGC\nCGCCACCACAAGCGATTGCTCAAGTAATTGATCAACGATATCGGTTTGCTCGTAAAGCTC\nCCAAGAGGAGCTGATTCGTAGGGATGGCGTTGTCATAACCGGGCCAACCTCAAAGGACAA\nACCAACAGTGTAAAGTGGGAGCTAGGTTAAATCGATAACACGAATATAGGTAAGGTACCG\nTGATGTAATATTAAGATCCTTCGAGGCTCAACTCAATATGTTTGAAACGGCATGGGCAGA\nGACTTATTCTGTGAATCGACCCATTCAAGCAGGAACTCGCGTAGTGGCTGGAGGAACTCT\nCTCATATCATCTTATTGAGATTGGCCTGGATTAGGCAATTTCTCCTCCTGTCGTGCCCGG\nAGCTTACCACTTCTTCCGGGCACTACGTGAACTGAGCCAAGTATTAACGAGCGGTGGGTA\nGCCTGACAAGATATGTGGGTAAAGGCCCTTCGGGACCTTCTGCTCCGTGGTCACTCCAGC\nGCTAATTGTTTAGCGAAATGAGAGTCTGGTAGGTTGGCGCCAATTACGAGATAGCTAGAC\nACGAACCCGCCATCCCCTTGCGTACGAGGAAACACGCGTATCTCGTGGGGAACCGTATAC\nCGAATTAACTTGCTGGTTCTCCTTTTTAGTGACGGGTGCACTTTTGTTCGTCTCTTTATC\nGTCCGGGCTCGACGGCTGCTCACTGAATCTTTCTAGT\n'}, 'Rosalind_0518': {'Percentage': 50.14807502467917, 'Sequence': 'CAATTTAGGCAGAGATAGTCACACGACTTATCAGATAGAGCACGCGCGATCGCGAGCTTAAAAGTCGGTACTTTATGATCCATATTGGGGACTTGCGTATATTTAGTGGTCTTGCGTTAC\nGCCAAGACAGACCTTACAGCTCCACACCAATTAATGTCACTACCAGTCGGGAGGCACGAA\nAGGCTGGGAGCGAGAAGTAGTTGAGGCCAGAGGACCTTCTTACGGTCCTCTGGGCCAACA\nATGCATAGTAGACCAGACAGAAGTTGGTTTTGTCCAGATTCCCTCCCGTTTGTACTTGCG\nGGCTATACAGTTATGCGGTAGAGGACCGCACTATGGGGCGACAGAAGTTGACTTACGTAC\nAGAGTCCACATTTAATTCACGGTCGATCAGAATTGTATAAAGGCCGCTAAGGCGGACAGC\nTGGATAAACTCAAGTCTGCCTAGAAAACTCCACGTTGCGGACGTGCCAGCGTGCTCCCAC\nGCAAACAACGCTCCAAAGTAGCGATCTCGGGCGATGGTATGGCGAGATTAGGCGCGGCTT\nTGTTGTGGCCTTGACACTAGCTTCGACCAAGTCGCTCCTCCCAACGGAGGTAACAACAAC\nCGTAACCGACTAGACACATTTCAGTTGCTAGTCTAATTCGTATGGTTCTGAGAACTATAG\nCTCAGCAGGGCAAACGGCATAGGTGTTACGGGCATAGAAACTCAGAAACAGTGGATTGCG\nGGAGCTGGAAGTGTTTAGCCTTTACGATCCCTTATTCGCAACCGCACAAGTCGAGGACGA\nGTGTGATACCCTACGTAGTGGAGTTCTTGGCTGGCCCGCGAAATTGTGCTTACACAGACC\nTGAAGACCGTGGTGGTAGAGATGGTTGCATGGTGCCCGGTGTTGGCACAATTGTATCGTT\nTGGCGTCCCACCCTCCCCGTACGAGTTTGGTGGAGTTATTATGCCCGGTTGTAGGAAACG\nAAGTCGTGTCACGAGATCGATGGACCCTTCGAATCCG\n'}}
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Content source: thirdknife/rosalind-problems
Similar notebooks: