Resources

For further information and tutorials see:

Software Carpentry Python Programming

Variables



In [2]:

    
# the = symbol indicates that what is on the right is assigned to the variable name on the left
name = 'Lina'
year = 2016



In [3]:

    
# we can then see what our variable is holding using the print() function
print(name)
# we can check the type of our variable using the type(variable_name) function
print(type(year))









    



Lina
<class 'int'>



In [4]:

    
# you must assign a variable before you call it, otherwise an error will occur
print(age)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-27506fe65432> in <module>()
      1 # you must assign a variable before you call it, otherwise an error will occur
----> 2 print(age)

NameError: name 'age' is not defined

Lists



In [6]:

    
fruits = ['apple', 'banana', 'mango', 'lychee']



In [7]:

    
print(fruits)









    



['apple', 'banana', 'mango', 'lychee']



In [10]:

    
fruits.append('orange')



In [11]:

    
print(fruits)









    



['apple', 'banana', 'mango', 'lychee', 'orange', 'orange', 'orange']



In [13]:

    
# lists don't need to comprise of all the same type 
misc = [29, 'dog', fruits]



In [14]:

    
print(misc)









    



[29, 'dog', ['apple', 'banana', 'mango', 'lychee', 'orange', 'orange', 'orange']]

Indexing and Slicing



In [15]:

    
#indexing in Python starts at 0
print(fruits[0])









    



apple



In [ ]:

    
print(fruits[1])



In [16]:

    
s = 'This is a string.'



In [17]:

    
print(s[0])



In [18]:

    
# use -1 to get the last element
print(fruits[-1])









    



orange



In [19]:

    
print(fruits[-2])









    



orange



In [20]:

    
# to get a slice of the string use the : symbol
print(s[0:4])









    



This



In [21]:

    
print(s[:4])









    



This



In [22]:

    
print(s[4:7])

is



In [23]:

    
print(s[7:])
print(s[7:len(s)])









    



 a string.
 a string.

If Statements



In [24]:

    
s2 = [19034, 23]

# You will always need to start with an 'if' line
# You do not need the elif or else statements
# You can have as many elif statements as needed

if type(s2) == str:
    print('s2 is a string')
elif type(s2) == int:
    print('s2 is an integer')
elif type(s2) == float:
    print('s2 is a float')
else:
    print('s2 is not a string or integer')









    



s2 is not a string or integer

For Loops



In [25]:

    
nums = [23, 56, 1, 10, 15, 0]



In [26]:

    
# in this case, 'n' is a dummy variable that will be used by the for loop
# you do not need to assign it ahead of time

for n in nums:
    if n%2 == 0:
        print('even')
    else:
        print('odd')









    



odd
even
odd
even
odd
even



In [27]:

    
# for loops can iterate over strings as well
vowels = 'aeiou'
for vowel in vowels:
    print(vowel)









    



a
e
i
o
u

Functions



In [30]:

    
# always use descriptive naming for functions, variables, arguments etc.
def sum_of_squares(num1, num2):
    ss = num1**2 + num2**2
    return(ss)



In [31]:

    
print(sum_of_squares(4,2))



In [32]:

    
# the return statement in a function allows us to store the output of a function call in a variable for later use
ss1 = sum_of_squares(5,5)



In [33]:

    
print(ss1)

Useful Packages



In [34]:

    
# use a package by importing it, you can also give it a shorter alias, in this case 'np'
import numpy as np



In [38]:

    
array = np.arange(15)
lst = list(range(15))



In [39]:

    
print(array)
print(lst)









    



[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]



In [40]:

    
print(type(array))
print(type(lst))









    



<class 'numpy.ndarray'>
<class 'list'>



In [41]:

    
# numpy arrays allow for vectorized calculations
print(array*2)
print(lst*2)









    



[ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]



In [42]:

    
array = array.reshape([5,3])
print(array)









    



[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]



In [43]:

    
# we can get the mean over all rows (using axis=1)
array.mean(axis=1)









    Out[43]:





array([  1.,   4.,   7.,  10.,  13.])



In [44]:

    
# max value in each column
array.max(axis=0)









    Out[44]:





array([12, 13, 14])



In [4]:

    
import pandas as pd



In [5]:

    
# this will read in a csv file into a pandas DataFrame
# this csv has data of country spending on healthcare
data = pd.read_csv('health.csv', header=0, index_col=0, encoding="ISO-8859-1")



In [6]:

    
# the .head() function will allow us to look at first few lines of the dataframe
data.head()









    Out[6]:






  
    
      
      1995
      1996
      1997
      1998
      1999
      2000
      2001
      2002
      2003
      2004
      2005
      2006
      2007
      2008
      2009
      2010
    
    
      Total expenditure on health as percentage of GDP (gross domestic product)
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Abkhazia
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      Afghanistan
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      5.7
      6.8
      6.4
      6.6
      6.8
      7.3
      7.0
      7.6
      7.6
    
    
      Akrotiri and Dhekelia
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      Albania
      2.6
      4.0
      4.8
      5.3
      5.8
      6.4
      6.0
      6.3
      6.2
      6.9
      6.8
      6.7
      6.9
      6.7
      6.9
      6.5
    
    
      Algeria
      4.2
      3.8
      4.1
      4.1
      3.9
      3.5
      3.8
      3.9
      3.7
      3.4
      3.1
      3.1
      3.5
      3.7
      4.6
      4.2



In [7]:

    
# by default, rows are indicated first, followed by the column: [row, column]
data.loc['Canada', '2008']









    Out[7]:





10.300000000000001



In [8]:

    
# you can also slice a dataframe
data.loc['Canada':'Denmark', '1999':'2001']









    Out[8]:






  
    
      
      1999
      2000
      2001
    
    
      Total expenditure on health as percentage of GDP (gross domestic product)
      
      
      
    
  
  
    
      Canada
      8.9
      8.8
      9.3
    
    
      Cape Verde
      4.5
      4.6
      5.0
    
    
      Cayman Islands
      NaN
      NaN
      NaN
    
    
      Central African Republic
      3.5
      3.8
      3.8
    
    
      Chad
      5.9
      6.3
      6.0
    
    
      Channel Islands
      NaN
      NaN
      NaN
    
    
      Chile
      8.2
      8.3
      8.4
    
    
      China
      4.5
      4.6
      4.6
    
    
      Christmas Island
      NaN
      NaN
      NaN
    
    
      Cocos Island
      NaN
      NaN
      NaN
    
    
      Colombia
      9.3
      7.3
      7.3
    
    
      Comoros
      3.2
      2.9
      2.3
    
    
      Congo, Dem. Rep.
      4.3
      4.9
      4.4
    
    
      Congo, Rep.
      3.1
      2.1
      2.4
    
    
      Cook Is
      3.5
      3.4
      4.6
    
    
      Costa Rica
      6.2
      6.5
      7.1
    
    
      Cote d'Ivoire
      5.6
      5.1
      4.0
    
    
      Croatia
      7.3
      7.8
      7.2
    
    
      Cuba
      6.0
      6.1
      6.3
    
    
      Cyprus
      5.6
      5.8
      5.8
    
    
      Czech Republic
      6.3
      6.3
      6.4
    
    
      Czechoslovakia
      NaN
      NaN
      NaN
    
    
      Denmark
      9.0
      8.7
      9.1



In [9]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [10]:

    
# the .plot() function will create a simple graph for you to quickly visualize your data
data.loc['Denmark'].plot()
data.loc['Canada'].plot()
data.loc['India'].plot()
plt.legend(loc='best')









    Out[10]:





<matplotlib.legend.Legend at 0x7fb96f27f4a8>

	1995	1996	1997	1998	1999	2000	2001	2002	2003	2004	2005	2006	2007	2008	2009	2010
Total expenditure on health as percentage of GDP (gross domestic product)
Abkhazia	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Afghanistan	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5.7	6.8	6.4	6.6	6.8	7.3	7.0	7.6	7.6
Akrotiri and Dhekelia	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Albania	2.6	4.0	4.8	5.3	5.8	6.4	6.0	6.3	6.2	6.9	6.8	6.7	6.9	6.7	6.9	6.5
Algeria	4.2	3.8	4.1	4.1	3.9	3.5	3.8	3.9	3.7	3.4	3.1	3.1	3.5	3.7	4.6	4.2

	1999	2000	2001
Total expenditure on health as percentage of GDP (gross domestic product)
Canada	8.9	8.8	9.3
Cape Verde	4.5	4.6	5.0
Cayman Islands	NaN	NaN	NaN
Central African Republic	3.5	3.8	3.8
Chad	5.9	6.3	6.0
Channel Islands	NaN	NaN	NaN
Chile	8.2	8.3	8.4
China	4.5	4.6	4.6
Christmas Island	NaN	NaN	NaN
Cocos Island	NaN	NaN	NaN
Colombia	9.3	7.3	7.3
Comoros	3.2	2.9	2.3
Congo, Dem. Rep.	4.3	4.9	4.4
Congo, Rep.	3.1	2.1	2.4
Cook Is	3.5	3.4	4.6
Costa Rica	6.2	6.5	7.1
Cote d'Ivoire	5.6	5.1	4.0
Croatia	7.3	7.8	7.2
Cuba	6.0	6.1	6.3
Cyprus	5.6	5.8	5.8
Czech Republic	6.3	6.3	6.4
Czechoslovakia	NaN	NaN	NaN
Denmark	9.0	8.7	9.1