Resources

For further information and tutorials see:

Variables


In [2]:
# the = symbol indicates that what is on the right is assigned to the variable name on the left
name = 'Lina'
year = 2016

In [3]:
# we can then see what our variable is holding using the print() function
print(name)
# we can check the type of our variable using the type(variable_name) function
print(type(year))


Lina
<class 'int'>

In [4]:
# you must assign a variable before you call it, otherwise an error will occur
print(age)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-27506fe65432> in <module>()
      1 # you must assign a variable before you call it, otherwise an error will occur
----> 2 print(age)

NameError: name 'age' is not defined

Lists


In [6]:
fruits = ['apple', 'banana', 'mango', 'lychee']

In [7]:
print(fruits)


['apple', 'banana', 'mango', 'lychee']

In [10]:
fruits.append('orange')

In [11]:
print(fruits)


['apple', 'banana', 'mango', 'lychee', 'orange', 'orange', 'orange']

In [13]:
# lists don't need to comprise of all the same type 
misc = [29, 'dog', fruits]

In [14]:
print(misc)


[29, 'dog', ['apple', 'banana', 'mango', 'lychee', 'orange', 'orange', 'orange']]

Indexing and Slicing


In [15]:
#indexing in Python starts at 0
print(fruits[0])


apple

In [ ]:
print(fruits[1])

In [16]:
s = 'This is a string.'

In [17]:
print(s[0])


T

In [18]:
# use -1 to get the last element
print(fruits[-1])


orange

In [19]:
print(fruits[-2])


orange

In [20]:
# to get a slice of the string use the : symbol
print(s[0:4])


This

In [21]:
print(s[:4])


This

In [22]:
print(s[4:7])


 is

In [23]:
print(s[7:])
print(s[7:len(s)])


 a string.
 a string.

If Statements


In [24]:
s2 = [19034, 23]

# You will always need to start with an 'if' line
# You do not need the elif or else statements
# You can have as many elif statements as needed

if type(s2) == str:
    print('s2 is a string')
elif type(s2) == int:
    print('s2 is an integer')
elif type(s2) == float:
    print('s2 is a float')
else:
    print('s2 is not a string or integer')


s2 is not a string or integer

For Loops


In [25]:
nums = [23, 56, 1, 10, 15, 0]

In [26]:
# in this case, 'n' is a dummy variable that will be used by the for loop
# you do not need to assign it ahead of time

for n in nums:
    if n%2 == 0:
        print('even')
    else:
        print('odd')


odd
even
odd
even
odd
even

In [27]:
# for loops can iterate over strings as well
vowels = 'aeiou'
for vowel in vowels:
    print(vowel)


a
e
i
o
u

Functions


In [30]:
# always use descriptive naming for functions, variables, arguments etc.
def sum_of_squares(num1, num2):
    ss = num1**2 + num2**2
    return(ss)

In [31]:
print(sum_of_squares(4,2))


20

In [32]:
# the return statement in a function allows us to store the output of a function call in a variable for later use
ss1 = sum_of_squares(5,5)

In [33]:
print(ss1)


50

Useful Packages


In [34]:
# use a package by importing it, you can also give it a shorter alias, in this case 'np'
import numpy as np

In [38]:
array = np.arange(15)
lst = list(range(15))

In [39]:
print(array)
print(lst)


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [40]:
print(type(array))
print(type(lst))


<class 'numpy.ndarray'>
<class 'list'>

In [41]:
# numpy arrays allow for vectorized calculations
print(array*2)
print(lst*2)


[ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [42]:
array = array.reshape([5,3])
print(array)


[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]

In [43]:
# we can get the mean over all rows (using axis=1)
array.mean(axis=1)


Out[43]:
array([  1.,   4.,   7.,  10.,  13.])

In [44]:
# max value in each column
array.max(axis=0)


Out[44]:
array([12, 13, 14])

In [4]:
import pandas as pd

In [5]:
# this will read in a csv file into a pandas DataFrame
# this csv has data of country spending on healthcare
data = pd.read_csv('health.csv', header=0, index_col=0, encoding="ISO-8859-1")

In [6]:
# the .head() function will allow us to look at first few lines of the dataframe
data.head()


Out[6]:
1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
Total expenditure on health as percentage of GDP (gross domestic product)
Abkhazia NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Afghanistan NaN NaN NaN NaN NaN NaN NaN 5.7 6.8 6.4 6.6 6.8 7.3 7.0 7.6 7.6
Akrotiri and Dhekelia NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Albania 2.6 4.0 4.8 5.3 5.8 6.4 6.0 6.3 6.2 6.9 6.8 6.7 6.9 6.7 6.9 6.5
Algeria 4.2 3.8 4.1 4.1 3.9 3.5 3.8 3.9 3.7 3.4 3.1 3.1 3.5 3.7 4.6 4.2

In [7]:
# by default, rows are indicated first, followed by the column: [row, column]
data.loc['Canada', '2008']


Out[7]:
10.300000000000001

In [8]:
# you can also slice a dataframe
data.loc['Canada':'Denmark', '1999':'2001']


Out[8]:
1999 2000 2001
Total expenditure on health as percentage of GDP (gross domestic product)
Canada 8.9 8.8 9.3
Cape Verde 4.5 4.6 5.0
Cayman Islands NaN NaN NaN
Central African Republic 3.5 3.8 3.8
Chad 5.9 6.3 6.0
Channel Islands NaN NaN NaN
Chile 8.2 8.3 8.4
China 4.5 4.6 4.6
Christmas Island NaN NaN NaN
Cocos Island NaN NaN NaN
Colombia 9.3 7.3 7.3
Comoros 3.2 2.9 2.3
Congo, Dem. Rep. 4.3 4.9 4.4
Congo, Rep. 3.1 2.1 2.4
Cook Is 3.5 3.4 4.6
Costa Rica 6.2 6.5 7.1
Cote d'Ivoire 5.6 5.1 4.0
Croatia 7.3 7.8 7.2
Cuba 6.0 6.1 6.3
Cyprus 5.6 5.8 5.8
Czech Republic 6.3 6.3 6.4
Czechoslovakia NaN NaN NaN
Denmark 9.0 8.7 9.1

In [9]:
%matplotlib inline
import matplotlib.pyplot as plt

In [10]:
# the .plot() function will create a simple graph for you to quickly visualize your data
data.loc['Denmark'].plot()
data.loc['Canada'].plot()
data.loc['India'].plot()
plt.legend(loc='best')


Out[10]:
<matplotlib.legend.Legend at 0x7fb96f27f4a8>