Quick recap

  • code blocks and conditional execution

In [ ]:
# code blocks
if condition:
    # do this
else
    # do that

In [ ]:
my_value = 12
if my_value > 5:
    print('value bigger than 5:', my_value)
else:
    print('value lower than 5:', my_value)

Session 2.2

  • for and while loops

In [ ]:
# for loops
my_values = [1, 3, 5, 7, 3, 9]
for v in my_values:
    print(v)

In [ ]:
# combine with previous condition
for v in my_values:
    if v > 5:
        print(v, 'is > than 5')
    else:
        print(v, 'is =< than 5')

In [ ]:
help(range)

In [ ]:
my_numbers = list(range(1, 10))
print(my_numbers)

for i in range(1, 10, 2):
    print(i)

In [ ]:
# example with string
prot_seq = 'SYLYC'
for s in seq:
    print(s)

In [ ]:
# example with dictionary
rna_dict = {"G":345.21, "C":305.18, "A":329.21, "U":302.16}
for r in rna_dict:
    print(r, rna_dict[r])

In [ ]:
print(r)
for r in rna_dict.values():
    print(r)

Exercise 2.2.1

  1. Create a 15 base long DNA sequence.
  2. Print its length.
  3. Use a for loop to print each individual base of the sequence on a new line.

While loop

There is a fundamental difference between the for and while loops:

  • with a for loop, you need to know beforehand how often the loop body will be executed
  • with a while loop, you need to repeat something until a given condition is true

In [ ]:
# while loops
my_value = 0.25
while my_value < 8:
    my_value += 1
    print (my_value)
print('my final value is', my_value)

Exercise 2.2.2

  1. Reuse the 15 base long sequence created.
  2. Create a while loop that starts at the third base in the sequence and outputs every third base until the 12th.

In [ ]:
# solution with while loop
dna = 'TCATCGTCTTCCTCA'
i = 2
while i < 12:
    print(dna[i])
    i += 3

In [ ]:
# solution with slicing and step
list(dna[2:12:3])

In [ ]:
# solution with range
for i in range(2, 12, 3):
    print(dna[i])

In [ ]:
# regular expression for checking all occurances within a string
my_text = 'Check occurences within this string'
import re 
for m in re.finditer('e', my_text):
    print(m.start(), m.end())

In [ ]:
help(re.finditer)

In [ ]:
# skip an iteration in loops
values = [10, -5, 3, -1, 7]
total = 0
for v in values:
    if v < 0:
        continue # Skip this iteration
    total += v
print(total)

In [ ]:
# break the loop completely
sequence = ['CAG','CAA', 'TAC','CAA']
for codon in sequence:
    if codon == 'TAC':
        break            # Quit looping at this point
    else:
        print(codon)

In [ ]:
# do not delete while looping

In [ ]:
# range
?range

In [ ]:
squares = []
for x in range(8):
    s = x*x
    squares.append(s)
    
print(squares)

In [ ]:
# using list comprehension
squares = [x*x for x in range(8)]
print(squares)

In [ ]:
# enumerate
letters = ['A','C','G','T']
for index, letter in enumerate(letters):
    print(index, letter)

In [ ]:
# filtering
city_pops = {
    'London': 8200000,
    'Cambridge': 130000,
    'Edinburgh': 420000,
    'Glasgow': 1200000
}

big_cities = []
for city in city_pops:
    if city_pops[city] >= 1000000:
         big_cities.append(city)

print(big_cities)

In [ ]:
# total population
# using a variable in the loop, give the example above
# or using sum of all values
sum(city_pops.values())

In [ ]:
# string formating: '{}' is a place holder for all values given to the format() function
rna_dict = {"G": 345.21, "C": 305.18, "A": 329.21, "U": 302.16}
for rna in rna_dict:
    print("This base {} has this mass {:.3f}".format(rna, rna_dict[rna]))

Exercise 2.2.3

  1. Calculate the GC content of a DNA sequence.
  2. Output every base of the sequence alongside its index on a new line.
  3. Loop over the bases in your sequence to count the Gs and the Cs.
  4. Print the % of GC for this sequence and format the result to display only 2 decimal place.

In [ ]:
seq = 'AAAAAAAATTTTTTCCCCCGGGG'
gc = 0
for i, s in enumerate(seq):
    #print(i, s)
    if (s == 'C') or (s == 'G'):
        gc += 1
#print(gc)
print('%GC is {:.2f} for this sequence {}'.format((gc/len(seq))*100, seq))