Python for Bioinformatics

This Jupyter notebook is intented to be used alongside the book Python for Bioinformatics

Chapter 4: Programming: Flow Control

Listing 4.1: ifelse1.py: Basic if-else sample


In [ ]:
height = float(input('What is height? (in meters): '))
 if height > 1.40:
     print('You can get in')
 else:
     print('This ride is not for you')

Listing 4.2: ifelse2.py: if-else in action


In [1]:
three_letter_code = {'A':'Ala','N':'Asn','D':'Asp','C':'Cys'}
 aa = input('Enter one letter: ')
 if aa in three_letter_code:
     print('The three letter code for {0} is {1}'.format(aa,
            three_letter_code[aa]))
 else:
     print("Sorry, I don't have it in my dictionary")


Enter one letter: A
The three letter code for A is Ala

Listing 4.3: elif1.py: Using elif


In [4]:
dna = input('Enter your primer sequence: ')
 seqsize = len(dna)
 if seqsize < 10:
     print('The primer must have at least ten nucleotides')
 elif seqsize < 25:
     print('This size is OK')
 else:
     print('The primer is too long')


Enter your primer sequence: ACGTAGCTCGACATCAGACTACGACTCGCATCGCATCAGCCTCGCATCGCGA
The primer is too long

In [4]:
bool(1=='1')


Out[4]:
False

Listing 4.4: nested.py: Nested if


In [ ]:
dna = raw_input('Enter your DNA sequence: ')
seqsize = len(dna)
if seqsize < 10:
    print('Your primer must have at least ten nucleotides')
    if seqsize == 0:
        print('You must enter something!')
elif seqsize < 25:
    print('This size is OK')
else:
    print('Your primer is too long')

In [15]:
answer=42
answer


Out[15]:
42

In [16]:
answer==3


Out[16]:
False

In [17]:
answer==42


Out[17]:
True

Listing 4.5: elif2.py: Nested if


In [ ]:
dna = raw_input('Enter your DNA sequence: ')
seqsize = len(dna)
if seqsize == 0:
    print('You must enter something!')
elif 0 < seqsize < 10:
    print('Your primer must have at least ten nucleotides')
elif seqsize < 25:
    print('This size is OK')
else:
    print('Your primer is too long')

Listing 4.6: multiplepart.py: Multiple part condition


In [22]:
x = 'N/A'
if x != 'N/A' and 5 < float(x) < 20:
    print('OK')
else:
    print('Not OK')


Not OK

Listing 4.7: multiplepart2.py: Multiple part condition, inverted


In [ ]:
x = 'N/A'
if 5 < float(x) < 20 and x != 'N/A':
    print('OK')
else:
    print('Not OK')

In [23]:
total = 5
items = 2
print("Average = {0}".format(total/items if items != 0 else "N/A"))


Average = 2.5

In [24]:
total = 5
items = 2
if items != 0:
    print("Average = {0}".format(total/items))
else:
    print("Average = N/A")


Average = 2.5

In [25]:
bases = ["C", "T", "G", "A"]
for x in bases:
    print(x)


C
T
G
A

In [26]:
bases = ["C", "T", "G", "A"]
for n, x in enumerate(bases):
    print(n, x)


0 C
1 T
2 G
3 A

In [27]:
for x in range(4):
    print(x)


0
1
2
3

In [ ]:
for n in [0, 1, 2, 3, 4]:
    print(n)

Listing 4.8: protwfor.py: Using for to figure the weight of a protein


In [28]:
prot_seq = input("Enter your protein sequence: ")
prot_weight = {"A":89, "V":117, "L":131, "I":131, "P":115,
               "F":165, "W":204, "M":149, "G":75, "S":105,
               "C":121, "T":119, "Y":181, "N":132, "Q":146,
               "D":133, "E":147, "K":146, "R":174, "H":155}
total_weight = 0
for aa in prot_seq:
    total_weight = total_weight + prot_weight.get(aa.upper(), 0)
total_weight = total_weight - (18 * (len(prot_seq) - 1))
print("The net weight is: {0}".format(total_weight))


Enter your protein sequence: AFTGTGATCGTMATGHQ
The net weight is: 1610

In [29]:
a = 10
while a < 40:
    print(a)
    a += 10


10
20
30

In [53]:
a = 10
while True:
    if a < 40:
        print(a)
    else:
        break
    a += 10


10
20
30

Listing 4.9: seachinlist.py: Searching a value in a list of tuples


In [40]:
color_code = [('red', 1), ('green', 2), ('blue', 3), ('black', 4)]
name = 'blue'
for color_pair in color_code:
    if name == color_pair[0]:
        code = color_pair[1]
print(code)


3

Listing 4.10: seachinlist2.py: Searching a value in a list of tuples


In [41]:
color_code = [('red',1), ('green',2), ('blue',3), ('black',4)]
name = 'blue'
for color_pair in color_code:
    if name == color_pair[0]:
        code = color_pair[1]
        break
print(code)


3

Listing 4.11: seachinlist3.py: Searching a value in a list of tuples


In [42]:
color_code = [('red',1), ('green',2), ('blue',3), ('black',4)]
name = 'blue'
i = 0
while name != color_code[i][0]:
    i += 1
code = color_code[i][1]
print(code)


3

Listing 4.12: seachindict.py: Searching a value in a list of tuples using a dictionary


In [43]:
color_code = [('red',1), ('green',2), ('blue',3), ('black',4)]
name = 'blue'
color_code_d = dict(color_code)
print(color_code_d[name])


3

Listing 4.13: protnetcharge.py: Net charge of a protein


In [44]:
prot_seq = input("Enter protein sequence: ").upper()
charge = -0.002
aa_charge = {"C":-.045,"D":-.999,"E":-.998,"H":.091,
             "K":1,"R":1,"Y":-.001}
for aa in prot_seq:
    if aa in aa_charge:
        charge += aa_charge[aa]
print(charge)


Enter protein sequence: EDCHYRYHEH
-1.7710000000000001

Listing 4.14: protnetcharge2.py: Net charge of a protein using get


In [3]:
prot_seq = input('Enter protein sequence: ').upper()
charge = -0.002
aa_charge = {'C':-.045,'D':-.999,'E':-.998,'H':.091,
             'K':1,'R':1,'Y':-.001}
for aa in prot_seq:
    charge += aa_charge.get(aa, 0)
print(charge)


Enter protein sequence: AWERRDRTKKRFDHYU
3.0919999999999996

Listing 4.15: lowdeg.py: Search for a low degeneration zone


In [47]:
prot_seq = input('Protein sequence: ').upper()
prot_deg = {'A':4, 'C':2, 'D':2, 'E':2, 'F':2, 'G':4,
             'H':2, 'I':3, 'K':2, 'L':6, 'M':1, 'N':2,
             'P':4, 'Q':2, 'R':6, 'S':6, 'T':4, 'V':4,
             'W':1, 'Y':2}
segs_values = []
for aa in range(len(prot_seq)):
    segment = prot_seq[aa:aa + 15]
    degen = 0
    if len(segment)==15:
        for x in segment:
            degen += prot_deg.get(x, 3.05)
        segs_values.append(degen)
min_value = min(segs_values)
minpos = segs_values.index(min_value)
print(prot_seq[minpos:minpos + 15])


Protein sequence: ACITSWYQEELTVSTHRRRKY
CITSWYQEELTVSTH

Listing 4.16: lowdeg2.py: Searching for a low-degeneration zone; version with while


In [48]:
prot_seq = input('Protein sequence: ').upper()
prot_deg = {'A':4, 'C':2, 'D':2, 'E':2, 'F':2, 'G':4,
            'H':2, 'I':3, 'K':2, 'L':6, 'M':1, 'N':2,
            'P':4, 'Q':2, 'R':6, 'S':6, 'T':4, 'V':4,
            'W':1, 'Y':2}
segs_values = []
segs_seqs = []
segment = prot_seq[:15]
a = 0
while len(segment)==15:
    degen = 0
    for x in segment:
        degen += prot_deg.get(x, 3.05)
    segs_values.append(degen)
    segs_seqs.append(segment)
    a += 1
    segment = prot_seq[a:a+15]
print(segs_seqs[segs_values.index(min(segs_values))])


Protein sequence: EKQEKQTVWACAMMMFTVYHHT
EKQEKQTVWACAMMM

Listing 4.17: lowdeg3.py: Searching for a low-degeneration zone without sub-chains


In [2]:
prot_seq = input('Protein sequence: ').upper()
prot_deg = {'A':4, 'C':2, 'D':2, 'E':2, 'F':2, 'G':4,
            'H':2, 'I':3, 'K':2, 'L':6, 'M':1, 'N':2,
            'P':4, 'Q':2, 'R':6, 'S':6, 'T':4, 'V':4,
            'W':1, 'Y':2}
degen_tmp = max(prot_deg.values()) * 15
for n in range(len(prot_seq) - 15):
    degen = 0
    for x in prot_seq[n:n + 15]:
        degen += prot_deg.get(x, 3.05)
    if degen <= degen_tmp:
        degen_tmp = degen
        seq = prot_seq[n:n + 15]
print(seq)


Protein sequence: EKQEKQTVWACAMMMFTVYHHT
KQEKQTVWACAMMMF