python introduction

analyzing patient data


In [ ]:
import numpy

In [ ]:
data = numpy.loadtxt(fname='inflammation-01.csv', delimiter=',')
data

In [ ]:
%whos

In [ ]:
print(data)

data types


In [ ]:
print(type(data))

In [ ]:
print(data.dtype)

In [ ]:
print(data.shape)

indices start at 0

and intervals exclude the last value: [i,j[


In [ ]:
print('first value in data:', data[0, 0])

In [ ]:
small = data[:3, 36:]
small

In [ ]:
data[:3, 36:]
print('small is:')
print(small)

calculations on arrays of values


In [ ]:
doubledata = data * 2.0

In [ ]:
print('original:')
print(data[:3, 36:])
print('doubledata:')
print(doubledata[:3, 36:])

In [ ]:
tripledata = doubledata + data

In [ ]:
print('tripledata:')
print(tripledata[:3, 36:])

In [ ]:
numpy.mean(data)

importing modules

what functions (or methods) are available? name. then press tab
help on functions (or methods): name? or ?name


In [ ]:
import time
print(time.ctime())

In [ ]:
time.
?time.strptime

In [ ]:
time.strftime?

In [ ]:
t1 = time.strptime("08/12/18 07:26:34 PM", '%m/%d/%y %H:%M:%S %p')
print(t1)
t2 = time.mktime(t1) # in seconds since 1970
print(t2/60/60/24)   # printed: in hours since 1970
time.strftime('%Y-%m-%d', t1)

In [ ]:
data.any?

In [ ]:
numpy.mean?

In [ ]:
maxval, minval, stdval = numpy.max(data), numpy.min(data), numpy.std(data)

print('maximum inflammation:', maxval)
print('minimum inflammation:', minval)
print('standard deviation:', stdval)

In [ ]:
patient_0 = data[0, :] # 0 on the first axis, everything on the second
print('maximum inflammation for patient 0:', patient_0.max())

In [ ]:
type(patient_0)

In [ ]:
print('maximum inflammation for patient 2:', numpy.max(data[2, :]))

In [ ]:
print(numpy.mean(data, axis=0))

In [ ]:
print(numpy.mean(data, axis=0).shape)

In [ ]:
print(numpy.mean(data, axis=1))

In [ ]:
import matplotlib.pyplot
image  = matplotlib.pyplot.imshow(data)
# % matplotlib inline
matplotlib.pyplot.show()

In [ ]:
ave_inflammation = numpy.mean(data, axis=0)
ave_plot = matplotlib.pyplot.plot(ave_inflammation)
matplotlib.pyplot.show()

In [ ]:
max_plot = matplotlib.pyplot.plot(numpy.max(data, axis=0))
matplotlib.pyplot.show()

In [ ]:
min_plot = matplotlib.pyplot.plot(numpy.min(data, axis=0))
matplotlib.pyplot.show()

In [ ]:
import numpy
import matplotlib.pyplot

data = numpy.loadtxt(fname='inflammation-01.csv', delimiter=',')
datamin = numpy.min(data)
datamax = numpy.max(data)

fig = matplotlib.pyplot.figure(figsize=(10.0, 3.0))

axes1 = fig.add_subplot(1, 3, 1) # add_subplot is a method for a Figure object
axes2 = fig.add_subplot(1, 3, 2)
axes3 = fig.add_subplot(1, 3, 3)

axes1.set_ylabel('average')
axes1.set_ylim(datamin, datamax+0.1)
axes1.plot(numpy.mean(data, axis=0))

axes2.set_ylabel('max')
axes2.set_ylim(datamin, datamax+0.1)
axes2.plot(numpy.max(data, axis=0))

axes3.set_ylabel('min')
axes3.set_ylim(datamin, datamax+0.1)
axes3.plot(numpy.min(data, axis=0), drawstyle='steps-mid')

fig.tight_layout()

matplotlib.pyplot.show()

stacking arrays


In [ ]:
import numpy

A = numpy.array([[1,2,3], [4,5,6], [7, 8, 9]])
print('A = ')
print(A)

B = numpy.hstack([A, A])
print('B = ')
print(B)

C = numpy.vstack([A, A])
print('C = ')
print(C)

slicing strings

notation i:j (think [i,j[ to exclude the last) with indices starting at 0, like for arrays


In [ ]:
first, second = 'Grace', 'Hopper'
third, fourth = second, first
print(third, fourth)

In [ ]:
element = 'oxygen'
print('first three characters:', element[0:3])
print('last three characters:', element[3:6])

In [ ]:
print(element[:4])
print(element[4:]); print(element[:])

In [ ]:
element[3:3]

In [ ]:
data[3:3, 4:4]

In [ ]:
data[3:3, :]

In [ ]:
word = 'lead'
for char in word:
    print(char)

for loop syntax

for variable in collection:
    statement 1 to do something
    statement 2 to do another thing
    statement 3, indented similarly

In [ ]:
length = 0
for vowel in 'aeiou':
    length = length + 1
    print('There are', length, 'vowels')

important: variables created inside the for loop still exist outside after the loop is finished


In [ ]:
print('The variable "vowel" still exists: equals', vowel)

In [ ]:
print(len('aeiou'))

In [ ]:
for i in range(1, 40):
   print(i, end=" ")

In [ ]:
print(type(range(1,40)))
range(3,1000)

In [ ]:
for i in range(3, 15, 4):
    print(i)

In [ ]:
print(5 ** 3)

In [ ]:
result = 1
for i in range(0, 3):
   result = result * 5
print(result)

reverse a string


In [ ]:
newstring = ''
oldstring = 'Newton'
for char in oldstring:
   newstring = char + newstring
print(newstring)

storing multiple values in a list


In [ ]:
odds = [1,3,    5,      7]
print('odds are:', odds)
print('first and last:', odds[0], odds[-1])
for number in odds:
    print(number)
    
names = ['Newton', 'Darwing', 'Turing'] # typo in Darwin's name
print('names is originally:', names)
names[1] = 'Darwin' # correct the name
print('final value of names:', names)

In [ ]:
name = 'Darwin'
print("letter indexed 0:", name[0])
name[0] = 'd'

In [ ]:
name = "darwin"
name

mutable and immutable objects

  • "name" contains the string 'Darwin', and strings are immutable.
  • lists and arrays are mutable. Functions that operate on them can change them in place

In [ ]:
a = "Darwin"
b = a
print("b=",b)
b = "Turing" # does not change a, because a has immutable value
print("now b=",b,"\nand a=",a)

In [ ]:
a = [10,11]
b = a
print("b[1]=", b[1]) # changes the value that b binds to, so changes a too
b[1] = 22
print("b=", b, "\nand a=",a)

In [ ]:
import copy
a = [10,11]
b = copy.copy(a)
print("b[1]=", b[1]) # changes the value that b binds to, so changes a too
b[1] = 22
print("b=", b, "\nand a=",a)

A list of lists is not the same as an array


In [ ]:
x = [['pepper', 'zucchini', 'onion'],
     ['cabbage', 'lettuce', 'garlic'],
     ['apple', 'pear', 'banana']]
print(x)
print(x[0])
print(x[0][0])
print([x[0]])

deep copy versus simple copy


In [ ]:
a = [[10,11],[20,21]]
print(a)
b = copy.copy(a)
b[0][0] = 50
print("b=",b,"and a=",a)
b[0] = [8,9]
print("b=",b,"and a=",a)
b = copy.deepcopy(a)
print("now b is back to a: ",b)
b[0][0] = 8
print("b=",b,"and a=",a)

more on mutable versus immutable objects: functions can change mutable arguments in place. This is a huge deal!


In [ ]:
def add1_scalar(x):
    """adds 1 to scalar input"""
    x += 1
    print("after add1_scalar:",x)

def add1_array(x):
    """adds 1 to the first element of array input"""
    x[0] += 1
    print("after add1_array:",x)

a=5; print(a)
add1_scalar(a)
print("and now a =",a) # a was not modified because it is immutable
b=[5]; print(b)
add1_array(b)
print("and now b =",b) # b was modified in place because it is mutable: array

In [ ]:
add1_scalar?

functions can change mutable arguments in place:

  • beware
  • opportunities to save a lot of memory (and time)

how to modify lists


In [ ]:
print('odds before:', odds)
odds.append(11)
print('odds after adding a value:', odds)

for R users, the following code does not do what you might think:


In [ ]:
odds = [odds, 11]
print('odds=',odds)

In [ ]:
odds = [1, 3, 5, 7, 11]
del odds[0]
print('odds after removing the first element:', odds)
odds.reverse()
print('odds after reversing:', odds)
a = odds.pop()
print('odds after popping last element:', odds)
print("this last element was",a)

more on lists and strings

  • splitting a string into a list
  • substitutions

In [ ]:
taxon = "Drosophila melanogaster"
genus = taxon[0:10]
print("genus:", genus)

species = taxon[11:]
print("species:", species)

gslist = taxon.split(' ')
print(gslist)
print("after splitting at each space: genus=",
      gslist[0],", species=",gslist[1], sep="")

In [ ]:
print(taxon)
print(taxon.replace(' ','_'))
print(taxon) # has not changed

In [ ]:
mystring = "\t hello world\n \n"
mystring

In [ ]:
print('here is mystring: "' + mystring + '"')
print('here is mystring.strip(): "' + mystring.strip() + '"')
print('here is mystring.rstrip(): "' + mystring.rstrip() + '"') # tRailing only
"     abc\n \n\t ".strip()

In [ ]:
chromosomes = ["X", "Y", "2", "3", "4"]
autosomes = chromosomes[2:5]
print("autosomes:", autosomes)

last = chromosomes[-1]
print("last:", last)
last = 21
print("last:", last)
chromosomes # "last" was a scalar: immutable, so modifying it does not modify "chromosomes"

In [ ]:
a = "Observation date: 02-Feb-2013"
b = [["fluorine", "F"], ["chlorine", "Cl"], ["bromine", "Br"], ["iodine", "I"], ["astatine", "At"]]
print(a[-4:])
print(b[-2:])

In [ ]:
months = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]
print("10:12 gives:", months[10:12])
print("10:len(months) gives:", months[10:len(months)])
print("10: gives", months[10:])

tuples

tuples are immutable , unlike lists. useful for

  • array sizes: (60,40) earlier
  • types of arguments to functions: like (float64, int64) for instance
  • functions can return multiple objects in a tuple
  • a tuple with a single value, say 6.5, is noted like this: (6.5,)
  • they come in very handy for exchanges:

In [ ]:
left = 'L'
right = 'R'

temp = left
left = right
right = temp
print("left =",left,"and right =",right)

In [ ]:
left = 'L'
right = 'R'

(left, right) = (right, left)
print("left =",left,"and right =",right)

In [ ]:
left, right = right, left
print("now left =",left,"and right =",right)

adding and multiplying lists

and remember that lists are mutable


In [ ]:
odds = [1, 3, 5, 7]
primes = odds
primes += [2]
print('primes:', primes)
print('odds:', odds)

use list to copy (but not deep-copy):


In [ ]:
odds = [1, 3, 5, 7]
primes = list(odds)
primes += [11]
print('primes:', primes)
print('odds:', odds)

a = [[10,11],[20,21]]
b = list(a)
b[0][0] = 50
print("b=",b,"\na=",a)

In [ ]:
odds += [9,11]
print("add = concatenate for lists: odds =", odds)

In [ ]:
counts = [2, 4, 6, 8, 10]
repeats = counts * 2
print("multiply = repeat for lists:\n", repeats)

operator overloading: the same function does different things depending on its arguments.

here: + and * can do different things


In [ ]:
print(sorted(repeats))    # all integers
print(sorted([10,2.5,4])) # all numerical
print(sorted(["jan","feb","mar","dec"]))  # all strings
print(sorted(["jan",20,1,"dec"]))  # error

list comprehension

general syntax: [xxx for y in z], where xxx is typically some function of y.
shortcut that executes a for loop on one line. here is one example:


In [ ]:
[num+5 for num in counts]