In [ ]:
# you can type math directly into Python (IPython, command line)
2+2
In [ ]:
4**2 # this is "4 to the power of 2"
In [ ]:
1./2 # converts 1 to a float
In [ ]:
4*3; # The semicolon suppresses the output in IPython - this is similar to Matlab or Oracle
In [ ]:
# the = symbol indicates that what is on the right is assigned to the variable name on the left
name = 'Madeleine'
year = 2017
In [ ]:
# we can then see what our variable is holding using the print() function
print(name)
# we can check the type of our variable using the type(variable_name) function
print(type(year))
In [ ]:
# you can pull up help information on any object or function:
help(type)
In [ ]:
# you must assign a variable before you call it, otherwise an error will occur
print(age)
In [ ]:
fruits = ['apple', 'banana', 'mango', 'lychee']
In [ ]:
print(fruits)
In [ ]:
fruits.append('orange')
print(fruits)
In [ ]:
# lists don't need to comprise of all the same type
misc = [29, 'dog', fruits]
In [ ]:
print(misc)
In [ ]:
print(fruits + fruits)
The second is called a "tuple", which is an immutable list (nothing can be added or subtracted) whose elements also can't be reassigned.
In [ ]:
tup1 = (1,2)
print(tup1)
In [ ]:
tup1[0] = 2 # this gives an error
In [ ]:
#indexing in Python starts at 0, not 1 (like in Matlab or Oracle)
print(fruits[0])
In [ ]:
print(fruits[1])
In [ ]:
# strings are just a particular kind of list
s = 'This is a string.'
In [ ]:
print(s[0])
In [ ]:
# use -1 to get the last element
print(fruits[-1])
In [ ]:
print(fruits[-2])
In [ ]:
# to get a slice of the string use the : symbol
print(s[0:4])
In [ ]:
print(s[:4])
In [ ]:
print(s[4:7])
In [ ]:
print(s[7:])
print(s[7:len(s)])
In [ ]:
s2 = [19034, 23]
# You will always need to start with an 'if' line
# You do not need the elif or else statements
# You can have as many elif statements as needed
if type(s2) == str:
print('s2 is a string')
elif type(s2) == int:
print('s2 is an integer')
elif type(s2) == float:
print('s2 is a float')
else:
print('s2 is not a string or integer')
In [ ]:
nums = [23, 56, 1, 10, 15, 0]
In [ ]:
# in this case, 'n' is a dummy variable that will be used by the for loop
# you do not need to assign it ahead of time
for n in nums:
if n%2 == 0:
print('even')
else:
print('odd')
In [ ]:
# for loops can iterate over strings as well
vowels = 'aeiou'
for vowel in vowels:
print(vowel)
In [ ]:
# always use descriptive naming for functions, variables, arguments etc.
def sum_of_squares(num1, num2):
"""
Input: two numbers
Output: the sum of the squares of the two numbers
"""
ss = num1**2 + num2**2
return(ss)
# The stuff inside """ """ is called the "docstring". It can be accessed by typing help(sum_of_squares)
In [ ]:
print(sum_of_squares(4,2))
In [ ]:
# the return statement in a function allows us to store the output of a function call in a variable for later use
ss1 = sum_of_squares(5,5)
In [ ]:
print(ss1)
In [ ]:
# use a package by importing it, you can also give it a shorter alias, in this case 'np'
import numpy as np
In [ ]:
array = np.arange(15)
lst = list(range(15))
In [ ]:
print(array)
print(lst)
In [ ]:
print(type(array))
print(type(lst))
In [ ]:
# numpy arrays allow for vectorized calculations
print(array*2)
print(lst*2)
In [ ]:
array = array.reshape([5,3])
print(array)
In [ ]:
# we can get the mean over all rows (using axis=1)
array.mean(axis=1)
In [ ]:
# max value in each column
array.max(axis=0)
In [ ]:
import pandas as pd
In [ ]:
# this will read in a csv file into a pandas DataFrame
# this csv has data of country spending on healthcare
data = pd.read_csv('health.csv', header=0, index_col=0, encoding="ISO-8859-1")
In [ ]:
# the .head() function will allow us to look at first few lines of the dataframe
data.head()
In [ ]:
# by default, rows are indicated first, followed by the column: [row, column]
data.loc['Canada', '2008']
In [ ]:
# you can also slice a dataframe
data.loc['Canada':'Denmark', '1999':'2001']
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
# the .plot() function will create a simple graph for you to quickly visualize your data
data.loc['Denmark'].plot()
data.loc['Canada'].plot()
data.loc['India'].plot()
plt.legend(loc='best')