In [45]:
# Integer, Int
print 7
print -7
In [46]:
# Float, Single, Double, Real, Floating Point, etc
print 7.
print 7.0
print 7.5
print -7.5
In [47]:
# Boolean
print True
print False
In [48]:
# Strings
print "Hello world!" # you can use double quotes
print 'Hello world!' # or single quotes
print """Hello world!""" # or triple-double quotes
print '''Hello world!''' # or triple-single quotes
# If you need to include a quote in your string...
print """"Hello "world!"""
# ...define the string with a different type of quote
# But some combinations will confuse the parser...
print """"Hello! This is Mike's world!\""""
# ...so you can always "escape" with a backslash (\)
In [49]:
# String formatting
# Substitute placeholders in your string with a value
print "Hello %s"%"World!" # string
print "This is my %dth time greeting the world."%99 # integer
print "It gets %f times better each time I do!"%4.58273 # float
print "But it only matters that its %.2f times better."%4.58273 # float with truncation
print "Thats significant with a p-value of %g!"%0.000000000825 # float in scientific notation
In [50]:
# Strings are a sequence of "characters"
# Each character has an associated numeric value
# This is called "character encoding"
# This is the ASCII character encoding table
from IPython.display import Image
Image(filename='../images/ascii.jpg')
# UTF-8 is the most common, and is backwards compatible with ASCII
Out[50]:
In [51]:
# Mathematical operators
print 'add',7 + 7
print 'sub',7 - 7
print 'mul',7 * 7
print 'div',7 / 7
print 'pow',pow(7,7)
print 'abs',abs(-7)
In [52]:
# Assignment operator
a = 7
b = 8
print 'a ',a
print 'b ',b
c = a + b
print 'c ',c
# Note: Primitive assignment (remember, we'll contrast this with reference types later)
a = b
print 'a=b',a
print 'b ',b
b = 1
print 'a ',a
print 'b=1',b
# Changes to B do not affect the value of A after assignment
In [53]:
# Modified assignemnt operators
a = 5
print 'a = 5 =>',a
a += 3
print 'a += 3 =>',a
a -= 5
print 'a -= 5 =>',a
a *= 3
print 'a *= 3 =>',a
a /= 5
print 'a /= 5 =>',a
# Note: 9 / 5 just gave me 1!
In [54]:
# Integer division, modulo, and casting
# If all arguments to the division operator are integers, it will perform integer division
print '9 / 5 =>',9 / 5
# Modulo gives me the remainder
print '9 % 5 =>',9 % 5
# If I want to get a floating point result, at least one argument needs to be floating point
print '9. % 5 =>',9. / 5
# And if these are stored in variables, I need to explicitly cast one to floating point
a,b = 9,5
print 'a / float(b) =>',a/float(b)
In [55]:
# Conditional/Boolean operators
# These return a "boolean" result (True/False)
print 7 == 7
print 7 != 7
print 7 < 7
print 7 > 7
print 7 <= 7
print 7 >= 7
In [56]:
# Explicit conversion to/from integer
print int(True)
print int(False)
print bool(0)
print bool(1)
In [57]:
# Implicit conversion to/from integer
print 7 + (7==7)
# i=0 i=1
a = ['False','True']
print a[7==7]
print a[7!=7]
In [58]:
# Boolean negation
print not 7 == 7
print not 7 != 7
In [59]:
# Conditional operators and floating point values
# Binary is not the best way to store floating point values
# Occassionally, a floating point value will *slightly* lose precision
# This can cause two values that *should be equivalent* to evaluate to False
# To avoid this, compare floating point numbers with tolerance
a = 0.283474739
b = 0.283474739
print abs(a-b) < 0.0001
# We will discuss this more on the board
In [60]:
# Lists, Arrays
print [0,1,2,3,4,5,6,7,8,9]
print range(0,10,1)
print range(10)
print [a for a in xrange(10)]
# Note: python (and most programming languages) count from 0
# Note: python does not have arrays by default, we will discuss the difference on the board
In [61]:
# Tuples (special cases, we'll discuss their usefulness later)
print (0,1,2,3,4,5,6,7,8,9)
print tuple(range(10))
In [62]:
# So why are these called reference types?
a = range(10)
b = a
print 'a',a
print 'b',b
a[5] = "VALUE CHANGED IN A"
print 'a',a
print 'b',b # the value has also been changed in B
# I will explain *why* on the board, but this is known as a "shallow copy"
In [63]:
# Lists of Lists!
a = [[1,2],[3,4,5],[6,7,8,9]]
print 'a',a
# b is a list that contains a list, that contains more lists!
b = [a,10]
print 'b',b
In [64]:
# If..then..elif...else
if 7 == 8:
print "Impossible"
elif 7 < 7:
print "Still absurd!"
else:
print "All of that was nonsense!"
In [65]:
# Loops
a = 7
for i in xrange(10):
if i == a:
print "i is 7!"
elif i < 7:
print "i is less than 7!"
else:
print "i *must be* greater than 7" # because it failed the other tests
In [66]:
# Constructing lists with loops (the WRONG way)
a = []
for i in xrange(10):
a.append(i)
print a
In [67]:
# Constructing lists with list comprehensions (the RIGHT way)
# I'll explain *why* its the right way on the board
a = [i for i in xrange(10)]
print a
# We can also use list comprehensions to subset an existing list
b = [i for i in a if i <=7]
print b
In [68]:
# method, function, routine, subroutine, etc are essentially synonymous in python
# name parameter list
def foo(param1,param2="bar",param3=None):
print param1
print param2
if param3:
print param3
foo('a') # 'a' is your *argument* to the parameter *param1*
print '--'
foo('a','b') # I can overwrite the default for param2 by passing a second *positional* argument
print '--'
foo('a',param3='c') # I can also skip param2 by passing a *keyword* argument to param3
In [69]:
# That method is pretty useless though, typically methods *return* values
def f(x):
return x + pow(x,2)
print f(1)
print f(2)
In [70]:
# I can use my methods anywhere, for example: list comprehension
a = [f(i) for i in xrange(10)]
print a
In [71]:
# Note: You can treat a method-call as you would the value it returns
print type(6)
print type(f(2))
# Not to confuse just the method itself, however:
print type(f)
In [72]:
# Generators return a sequence of values, but not a list
# The `range` method returns a list
print range(10)
# The `xrange` method returns a generator
print xrange(10)
In [73]:
# Generators are memory-efficient, because they don't compute everything at once
# Values are retrieved from a generator by iteration
for i in xrange(10):
print i,
print ''
# ...or you can build a list
print [i for i in xrange(10)]
# ...or you can let the list method iterate for you
print list(xrange(10))
In [74]:
# Open a single-column file and read its contents into a list
# Note that the object returned by open() is being assigned to `fin`
# The `with` command creates a new scope, and when variables leave scope, they're destroyed
# For open file objects, part of their default destruction is calling fin.close()
# This syntax is a good way to never forget to close a file
with open('../data/sample.txt','rb') as fin: # <- fin is a generator that iterates over the file
x = [line.strip() for line in fin]
print x
In [75]:
# Open a multi-column, tab-delimited file nad read its contents into a 2D list (list of lists)
# We can do it manually...
with open('../data/sample2.txt','rb') as fin:
x = [line.strip().split('\t') for line in fin]
print x
In [76]:
# ...or we can import libraries to help us out
import csv
with open('../data/sample2.txt','rb') as fin:
reader = csv.reader(fin,delimiter='\t')
x = [row for row in reader]
print x
In [77]:
# First, lets separate our header row from the rest
with open('../data/sample2.txt','rb') as fin:
h = fin.readline().strip().split('\t') # fin is a generator, so we can manually request the first iteration
reader = csv.reader(fin,delimiter='\t') # and then let reader call the rest
x = [row for row in reader]
print h
print x
In [78]:
# But our values are being stored as strings, not integers
# No worries, we can fix that
with open('../data/sample2.txt','rb') as fin:
h = fin.readline().strip().split('\t')
reader = csv.reader(fin,delimiter='\t')
x = [[int(r) for r in row] for row in reader]
print h
print x
In [79]:
# We can also transpose 2D lists with a little magic
print zip(*x)
In [80]:
# But now our internal lists aren't lists, they're tuples
# No worries, we can avoid that with a simple list comprehension
print [list(i) for i in zip(*x)]
When we call open(fname,'rb') we are creating a generator. csv.reader takes a generator as input, and is itself a generator. It iterates over each line, does some processing for us, and then passes that line along to us as a processed "row" of data. When we call fin.readline() we're manually iterating once, which corresponds to the first row (the header). When we pass fin to csv.reader, the iteration is not reset. When csv.reader now calls fin.readline() (internally), it will get the 2nd row.
At first glance, understanding references looks like the type of things you never wanted to learn from that one CS class someone made you take one time. They are incredibly powerful tools, however. Not only does much of the data you work with in python get saved via reference, but reference types have a major impact on data passed to method calls. You can also exploit references in your own code to save space and create more intricate object interactions.
Yes. Yes they are. They are also very helpful, and easy to write your own! Do you have two different file types you'd like to accept as input? Write two generators that each read different file types, but modify the data into a single format. Now you can use a single set of methods to process the data, because your generators are doing all the file parsing.
Also confusing. * is an operator that means "unlist". zip is a method that "zips together" two lists. On their own, they're confusing to work with, but just remember that if you have a 2D list, zip(*x) will transpose that list.
Check the documentation. If you don't understand the documentation, ask us!
Google what you want! Python has a huge user base and StackOverflow has excellent answers to the vast majority of beginner (and advanced) questions. Don't be too specific, think about what you want to accomplish in general terms. Use vocabulary from this notebook. If you can't find your answer there, ask us!
Awesome. I don't know ahead of time what you'll find interesting, so tell me what you'd like to hear more about.
In [80]: