Week 1 - Getting Started


In [33]:
import numpy as np

print("Numpy:", np.__version__)


Numpy: 1.10.4

Python Summary

Further information

More information is usually available with the help function. Using ? brings up the same information in ipython.

Using the dir function lists all the options available from a variable.

help(np)

np?

dir(np)


In [4]:
dir(np)


Out[4]:
['ALLOW_THREADS',
 'BUFSIZE',
 'CLIP',
 'ComplexWarning',
 'DataSource',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'MachAr',
 'ModuleDeprecationWarning',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'PackageLoader',
 'RAISE',
 'RankWarning',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'VisibleDeprecationWarning',
 'WRAP',
 '_NoValue',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__doc__',
 '__file__',
 '__git_revision__',
 '__loader__',
 '__mkl_version__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_import_tools',
 '_mat',
 'abs',
 'absolute',
 'absolute_import',
 'add',
 'add_docstring',
 'add_newdoc',
 'add_newdoc_ufunc',
 'add_newdocs',
 'alen',
 'all',
 'allclose',
 'alltrue',
 'alterdot',
 'amax',
 'amin',
 'angle',
 'any',
 'append',
 'apply_along_axis',
 'apply_over_axes',
 'arange',
 'arccos',
 'arccosh',
 'arcsin',
 'arcsinh',
 'arctan',
 'arctan2',
 'arctanh',
 'argmax',
 'argmin',
 'argpartition',
 'argsort',
 'argwhere',
 'around',
 'array',
 'array2string',
 'array_equal',
 'array_equiv',
 'array_repr',
 'array_split',
 'array_str',
 'asanyarray',
 'asarray',
 'asarray_chkfinite',
 'ascontiguousarray',
 'asfarray',
 'asfortranarray',
 'asmatrix',
 'asscalar',
 'atleast_1d',
 'atleast_2d',
 'atleast_3d',
 'average',
 'bartlett',
 'base_repr',
 'bench',
 'binary_repr',
 'bincount',
 'bitwise_and',
 'bitwise_not',
 'bitwise_or',
 'bitwise_xor',
 'blackman',
 'bmat',
 'bool',
 'bool8',
 'bool_',
 'broadcast',
 'broadcast_arrays',
 'broadcast_to',
 'busday_count',
 'busday_offset',
 'busdaycalendar',
 'byte',
 'byte_bounds',
 'bytes0',
 'bytes_',
 'c_',
 'can_cast',
 'cast',
 'cbrt',
 'cdouble',
 'ceil',
 'cfloat',
 'char',
 'character',
 'chararray',
 'choose',
 'clip',
 'clongdouble',
 'clongfloat',
 'column_stack',
 'common_type',
 'compare_chararrays',
 'compat',
 'complex',
 'complex128',
 'complex64',
 'complex_',
 'complexfloating',
 'compress',
 'concatenate',
 'conj',
 'conjugate',
 'convolve',
 'copy',
 'copysign',
 'copyto',
 'core',
 'corrcoef',
 'correlate',
 'cos',
 'cosh',
 'count_nonzero',
 'cov',
 'cross',
 'csingle',
 'ctypeslib',
 'cumprod',
 'cumproduct',
 'cumsum',
 'datetime64',
 'datetime_as_string',
 'datetime_data',
 'deg2rad',
 'degrees',
 'delete',
 'deprecate',
 'deprecate_with_doc',
 'diag',
 'diag_indices',
 'diag_indices_from',
 'diagflat',
 'diagonal',
 'diff',
 'digitize',
 'disp',
 'divide',
 'division',
 'dot',
 'double',
 'dsplit',
 'dstack',
 'dtype',
 'e',
 'ediff1d',
 'einsum',
 'emath',
 'empty',
 'empty_like',
 'equal',
 'errstate',
 'euler_gamma',
 'exp',
 'exp2',
 'expand_dims',
 'expm1',
 'extract',
 'eye',
 'fabs',
 'fastCopyAndTranspose',
 'fft',
 'fill_diagonal',
 'find_common_type',
 'finfo',
 'fix',
 'flatiter',
 'flatnonzero',
 'flexible',
 'fliplr',
 'flipud',
 'float',
 'float16',
 'float32',
 'float64',
 'float_',
 'floating',
 'floor',
 'floor_divide',
 'fmax',
 'fmin',
 'fmod',
 'format_parser',
 'frexp',
 'frombuffer',
 'fromfile',
 'fromfunction',
 'fromiter',
 'frompyfunc',
 'fromregex',
 'fromstring',
 'full',
 'full_like',
 'fv',
 'generic',
 'genfromtxt',
 'get_array_wrap',
 'get_include',
 'get_printoptions',
 'getbufsize',
 'geterr',
 'geterrcall',
 'geterrobj',
 'gradient',
 'greater',
 'greater_equal',
 'half',
 'hamming',
 'hanning',
 'histogram',
 'histogram2d',
 'histogramdd',
 'hsplit',
 'hstack',
 'hypot',
 'i0',
 'identity',
 'iinfo',
 'imag',
 'in1d',
 'index_exp',
 'indices',
 'inexact',
 'inf',
 'info',
 'infty',
 'inner',
 'insert',
 'int',
 'int0',
 'int16',
 'int32',
 'int64',
 'int8',
 'int_',
 'int_asbuffer',
 'intc',
 'integer',
 'interp',
 'intersect1d',
 'intp',
 'invert',
 'ipmt',
 'irr',
 'is_busday',
 'isclose',
 'iscomplex',
 'iscomplexobj',
 'isfinite',
 'isfortran',
 'isinf',
 'isnan',
 'isneginf',
 'isposinf',
 'isreal',
 'isrealobj',
 'isscalar',
 'issctype',
 'issubclass_',
 'issubdtype',
 'issubsctype',
 'iterable',
 'ix_',
 'kaiser',
 'kron',
 'ldexp',
 'left_shift',
 'less',
 'less_equal',
 'lexsort',
 'lib',
 'linalg',
 'linspace',
 'little_endian',
 'load',
 'loads',
 'loadtxt',
 'log',
 'log10',
 'log1p',
 'log2',
 'logaddexp',
 'logaddexp2',
 'logical_and',
 'logical_not',
 'logical_or',
 'logical_xor',
 'logspace',
 'long',
 'longcomplex',
 'longdouble',
 'longfloat',
 'longlong',
 'lookfor',
 'ma',
 'mafromtxt',
 'mask_indices',
 'mat',
 'math',
 'matmul',
 'matrix',
 'matrixlib',
 'max',
 'maximum',
 'maximum_sctype',
 'may_share_memory',
 'mean',
 'median',
 'memmap',
 'meshgrid',
 'mgrid',
 'min',
 'min_scalar_type',
 'minimum',
 'mintypecode',
 'mirr',
 'mod',
 'modf',
 'moveaxis',
 'msort',
 'multiply',
 'nan',
 'nan_to_num',
 'nanargmax',
 'nanargmin',
 'nanmax',
 'nanmean',
 'nanmedian',
 'nanmin',
 'nanpercentile',
 'nanprod',
 'nanstd',
 'nansum',
 'nanvar',
 'nbytes',
 'ndarray',
 'ndenumerate',
 'ndfromtxt',
 'ndim',
 'ndindex',
 'nditer',
 'negative',
 'nested_iters',
 'newaxis',
 'nextafter',
 'nonzero',
 'not_equal',
 'nper',
 'npv',
 'numarray',
 'number',
 'obj2sctype',
 'object',
 'object0',
 'object_',
 'ogrid',
 'oldnumeric',
 'ones',
 'ones_like',
 'outer',
 'packbits',
 'pad',
 'partition',
 'percentile',
 'pi',
 'piecewise',
 'pkgload',
 'place',
 'pmt',
 'poly',
 'poly1d',
 'polyadd',
 'polyder',
 'polydiv',
 'polyfit',
 'polyint',
 'polymul',
 'polynomial',
 'polysub',
 'polyval',
 'power',
 'ppmt',
 'print_function',
 'prod',
 'product',
 'promote_types',
 'ptp',
 'put',
 'putmask',
 'pv',
 'r_',
 'rad2deg',
 'radians',
 'random',
 'rank',
 'rate',
 'ravel',
 'ravel_multi_index',
 'real',
 'real_if_close',
 'rec',
 'recarray',
 'recfromcsv',
 'recfromtxt',
 'reciprocal',
 'record',
 'remainder',
 'repeat',
 'require',
 'reshape',
 'resize',
 'restoredot',
 'result_type',
 'right_shift',
 'rint',
 'roll',
 'rollaxis',
 'roots',
 'rot90',
 'round',
 'round_',
 'row_stack',
 's_',
 'safe_eval',
 'save',
 'savetxt',
 'savez',
 'savez_compressed',
 'sctype2char',
 'sctypeDict',
 'sctypeNA',
 'sctypes',
 'searchsorted',
 'select',
 'set_numeric_ops',
 'set_printoptions',
 'set_string_function',
 'setbufsize',
 'setdiff1d',
 'seterr',
 'seterrcall',
 'seterrobj',
 'setxor1d',
 'shape',
 'shares_memory',
 'short',
 'show_config',
 'sign',
 'signbit',
 'signedinteger',
 'sin',
 'sinc',
 'single',
 'singlecomplex',
 'sinh',
 'size',
 'sometrue',
 'sort',
 'sort_complex',
 'source',
 'spacing',
 'split',
 'sqrt',
 'square',
 'squeeze',
 'stack',
 'std',
 'str',
 'str0',
 'str_',
 'string_',
 'subtract',
 'sum',
 'swapaxes',
 'sys',
 'take',
 'tan',
 'tanh',
 'tensordot',
 'test',
 'testing',
 'tile',
 'timedelta64',
 'trace',
 'transpose',
 'trapz',
 'tri',
 'tril',
 'tril_indices',
 'tril_indices_from',
 'trim_zeros',
 'triu',
 'triu_indices',
 'triu_indices_from',
 'true_divide',
 'trunc',
 'typeDict',
 'typeNA',
 'typecodes',
 'typename',
 'ubyte',
 'ufunc',
 'uint',
 'uint0',
 'uint16',
 'uint32',
 'uint64',
 'uint8',
 'uintc',
 'uintp',
 'ulonglong',
 'unicode',
 'unicode_',
 'union1d',
 'unique',
 'unpackbits',
 'unravel_index',
 'unsignedinteger',
 'unwrap',
 'ushort',
 'vander',
 'var',
 'vdot',
 'vectorize',
 'version',
 'void',
 'void0',
 'vsplit',
 'vstack',
 'warnings',
 'where',
 'who',
 'zeros',
 'zeros_like']

Variables

A variable is simply a name for something. One of the simplest tasks is printing the value of a variable.

Printing can be customized using the format method on strings.


In [5]:
location = 'Bethesda'
zip_code = 20892
elevation = 71.9

print("We're in", location, "zip code", zip_code, ", ", elevation, "m above sea level")
print("We're in " + location + " zip code " + str(zip_code) + ", " + str(elevation) + "m above sea level")
print("We're in {0} zip code {1}, {2}m above sea level".format(location, zip_code, elevation))
print("We're in {0} zip code {1}, {2:.2e}m above sea level".format(location, zip_code, elevation))


We're in Bethesda zip code 20892 ,  71.9 m above sea level
We're in Bethesda zip code 20892, 71.9m above sea level
We're in Bethesda zip code 20892, 71.9m above sea level
We're in Bethesda zip code 20892, 7.19e+01m above sea level

Types

A number of different types are available as part of the standard library. The following links to the documentation provide a summary.

Other types are available from other packages and can be created to support special situations.

A variety of different methods are available depending on the type.


In [1]:
# Sequences

# Lists
l = [1,2,3,4,4]
print("List:", l, len(l), 1 in l)

# Tuples
t = (1,2,3,4,4)
print("Tuple:", t, len(t), 1 in t)

# Sets
s = set([1,2,3,4,4])
print("Set:", s, len(s), 1 in s)

# Dictionaries
# Dictionaries map hashable values to arbitrary objects
d = {'a': 1, 'b': 2, 3: 's', 2.5: 't'}
print("Dictionary:", d, len(d), 'a' in d)


List: [1, 2, 3, 4, 4] 5 True
Tuple: (1, 2, 3, 4, 4) 5 True
Set: {1, 2, 3, 4} 4 True
Dictionary: {'a': 1, 3: 's', 2.5: 't', 'b': 2} 4 True

In [2]:
import random

if random.random() < 0.5:
    print("Should be printed 50% of the time")
elif random.random() < 0.5:
    print("Should be primted 25% of the time")
else:
    print("Should be printed 25% of the time")


Should be printed 50% of the time

In [3]:
for i in ['a', 'b', 'c', 'd']:
    print(i)
else:
    print('Else')
    
for i in ['a', 'b', 'c', 'd']:
    if i == 'b':
        continue
    elif i == 'd':
        break
    print(i)
else:
    print('Else')


a
b
c
d
Else
a
c

In [4]:
def is_even(n):
    return not n % 2

print(is_even(1), is_even(2))


False True

In [5]:
def first_n_squared_numbers(n=5):
    return [i**2 for i in range(1,n+1)]

print(first_n_squared_numbers())


[1, 4, 9, 16, 25]

In [6]:
def next_fibonacci(status=[]):
    if len(status) < 2:
        status.append(1)
        return 1
    status.append(status[-2] + status[-1])
    return status[-1]

print(next_fibonacci(), next_fibonacci(), next_fibonacci(), next_fibonacci(), next_fibonacci(), next_fibonacci())


1 1 2 3 5 8

In [6]:
def accepts_anything(*args, **kwargs):
    for a in args:
        print(a)
    print(type(args))
    for k in kwargs:
        print(k, kwargs[k])
        
accepts_anything(1,2,3,4, a=1, b=2, c=3)


1
2
3
4
<class 'tuple'>
c 3
a 1
b 2

In [8]:
# For quick and simple functions a lambda expression can be a useful approach. 
# Standard functions are always a valid alternative and often make code clearer.
f = lambda x: x**2
print(f(5))

people = [{'name': 'Alice', 'age': 30}, 
          {'name': 'Bob', 'age': 35}, 
          {'name': 'Charlie', 'age': 35}, 
          {'name': 'Dennis', 'age': 25}]
print(people)
people.sort(key=lambda x: x['age'])
print(people)


25
[{'age': 30, 'name': 'Alice'}, {'age': 35, 'name': 'Bob'}, {'age': 35, 'name': 'Charlie'}, {'age': 25, 'name': 'Dennis'}]
[{'age': 25, 'name': 'Dennis'}, {'age': 30, 'name': 'Alice'}, {'age': 35, 'name': 'Bob'}, {'age': 35, 'name': 'Charlie'}]

In [56]:
a = np.array([[1,2,3], [4,5,6], [7,8,9]])
print(a)
print(a[1:,1:])
a = a + 2
print(a)
a = a + np.array([1,2,3])
print(a)
a = a + np.array([[10],[20],[30]])
print(a)


[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[5 6]
 [8 9]]
[[ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[[ 4  6  8]
 [ 7  9 11]
 [10 12 14]]
[[14 16 18]
 [27 29 31]
 [40 42 44]]

In [59]:
print(a.mean(), a.mean(axis=0), a.mean(axis=1))


29.0 [ 27.  29.  31.] [ 16.  29.  42.]

In [65]:
import matplotlib.pyplot as plt

%matplotlib inline

In [68]:
x = np.linspace(0, 3*2*np.pi, 500)

plt.plot(x, np.sin(x))
plt.show()


Exercises


In [35]:
a = "The quick brown fox jumps over the lazy dog"
b = 1234567890.0
  • Print the variable a in all uppercase
  • Print the variable a with every other letter in uppercase
  • Print the variable a in reverse, i.e. god yzal ...
  • Print the variable a with the words reversed, i.e. ehT kciuq ...
  • Print the variable b in scientific notation with 4 decimal places

In [42]:
print(a.upper())

upper = True
soln = ''
for i in a:
    if upper == True:
        soln += i.upper()
    else:
        soln += i
    if i != ' ':
        upper = not upper
print(soln)

def reverse_string(s):
    al = list(s)
    al.reverse()
    return ''.join(al)

print(reverse_string(a))

words = a.split(' ')
print(' '.join([reverse_string(i) for i in words]))

print('{0:.4e}'.format(b))


THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG
ThE qUiCk BrOwN fOx JuMpS oVeR tHe LaZy DoG
god yzal eht revo spmuj xof nworb kciuq ehT
ehT kciuq nworb xof spmuj revo eht yzal god
1.2346e+09

In [53]:
people = [{'name': 'Charlie', 'age': 35}, 
          {'name': 'Alice', 'age': 30}, 
          {'name': 'Eve', 'age': 20},
          {'name': 'Gail', 'age': 30},
          {'name': 'Dennis', 'age': 25},
          {'name': 'Bob', 'age': 35},
          {'name': 'Fred', 'age': 25},]
  • Print the items in people as comma seperated values
  • Sort people so that they are ordered by age, and print
  • Sort people so that they are ordered by age first, and then their names, i.e. Bob and Charlie should be next to each other due to their ages with Bob first due to his name.

In [55]:
for i in people:
    print('{0},{1}'.format(i['name'], i['age']))
    
for i in people:
    print('{0:<10},{1}'.format(i['name'], i['age']))


Charlie,35
Alice,30
Eve,20
Gail,30
Dennis,25
Bob,35
Fred,25
Charlie   ,35
Alice     ,30
Eve       ,20
Gail      ,30
Dennis    ,25
Bob       ,35
Fred      ,25

In [57]:
def key_age(x):
    return x['age']

people.sort(key=key_age)
print(people)

people.sort(key=lambda x: x['name'])
people.sort(key=key_age)
print(people)


[{'age': 20, 'name': 'Eve'}, {'age': 25, 'name': 'Dennis'}, {'age': 25, 'name': 'Fred'}, {'age': 30, 'name': 'Alice'}, {'age': 30, 'name': 'Gail'}, {'age': 35, 'name': 'Charlie'}, {'age': 35, 'name': 'Bob'}]
[{'age': 20, 'name': 'Eve'}, {'age': 25, 'name': 'Dennis'}, {'age': 25, 'name': 'Fred'}, {'age': 30, 'name': 'Alice'}, {'age': 30, 'name': 'Gail'}, {'age': 35, 'name': 'Bob'}, {'age': 35, 'name': 'Charlie'}]

In [59]:
coords = [(0,0), (10,5), (10,10), (5,10), (3,3), (3,7), (12,3), (10,11)]
  • Write a function that returns the first n prime numbers
  • Given a list of coordinates calculate the distance covered travelling between all the points in order given using the Euclidean distance
  • Given a list of coordinates arrange them in such a way that the distance traveled is minimized (the itertools module may be useful).

In [58]:
def first_n_primes(n):
    primes = []
    i = 2
    while len(primes) < n:
        for p in primes:
            if i % p == 0:
                break
        else:
            primes.append(i)
        i += 1
    return primes

print(first_n_primes(5))


[2, 3, 5, 7, 11]

In [66]:
def distance(coords):
    distance = 0
    for p1, p2 in zip(coords[:-1], coords[1:]):
        distance += ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5
    return distance
        
print(distance(coords))

assert distance([(0,0), (10,0)]) == 10
assert distance([(0,0), (-10, 0)]) == 10
assert distance([(0,0), (3,4)]) == 5


50.55551882981089

In [67]:
import itertools

all_options = []
for option in itertools.permutations(coords, len(coords)):
    all_options.append((option, distance(option)))
all_options.sort(key=lambda x: x[1])

print(all_options[0])


(((12, 3), (10, 5), (10, 10), (10, 11), (5, 10), (3, 7), (3, 3), (0, 0)), 25.775638600922246)

In [68]:
np.random.seed(0)
a = np.random.randint(0, 100, size=(10,20))
  • Print the standard deviation of each row in a numpy array
  • Print only the values greater than 90 in a numpy array
  • From a numpy array display the values in each row in a seperate plot (the subplots method may be useful)

In [72]:
print(a.std())
print(a)
print(a.std(axis=1))
print(a[a>90])


28.5917383172
[[44 47 64 67 67  9 83 21 36 87 70 88 88 12 58 65 39 87 46 88]
 [81 37 25 77 72  9 20 80 69 79 47 64 82 99 88 49 29 19 19 14]
 [39 32 65  9 57 32 31 74 23 35 75 55 28 34  0  0 36 53  5 38]
 [17 79  4 42 58 31  1 65 41 57 35 11 46 82 91  0 14 99 53 12]
 [42 84 75 68  6 68 47  3 76 52 78 15 20 99 58 23 79 13 85 48]
 [49 69 41 35 64 95 69 94  0 50 36 34 48 93  3 98 42 77 21 73]
 [ 0 10 43 58 23 59  2 98 62 35 94 67 82 46 99 20 81 50 27 14]
 [41 58 65 36 10 86 43 11  2 51 80 32 54  0 38 19 46 42 56 60]
 [77 30 24  2  3 94 98 13 40 72 19 95 72 26 66 52 67 61 14 96]
 [ 4 67 11 86 77 75 56 16 24 29 21 25 80 60 61 83 33 32 70 85]]
[ 25.08804496  28.47011591  21.7795202   30.07972739  28.97667165
  28.50697283  31.07008207  23.401923    32.10681392  27.20087315]
[99 91 99 99 95 94 93 98 98 94 99 94 98 95 96]

In [73]:
import matplotlib.pyplot as plt

%matplotlib inline

fig, axes = plt.subplots(5, 2, figsize=(16,12))
for i, ax in zip(range(a.shape[0]), axes.flatten()):
    ax.plot(a[i])
plt.show()



In [ ]: