In [1]:
##Some code to run at the beginning of the file, to be able to show images in the notebook
##Don't worry about this cell
#Print the plots in this screen
%matplotlib inline
#Be able to plot images saved in the hard drive
from IPython.display import Image
#Make the notebook wider
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
Python uses variables and code.
Variables tell the computer to save something (a number, a string, a spreadsheet) with a name.
For instance, if you write variable_name = 3, the computer knows that variable_name is 3.
Variables can represents:
They have methods = ways to edit the data structure. For example add, delete, find, sort... (= functions in excel)
In [2]:
##this is a list
print([1,2,3])
In [2]:
print(type([1,2,3]))
In [3]:
# A list can combine several data types
this_is_list1 = [3.5,"I'm another string",4]
print(this_is_list1)
# It can even combine several data structures, for instance a list inside a list
this_is_list2 = [3.5,"I'm another string",4,this_is_list1]
print(this_is_list2)
OPERATIONS IN LISTS
In [1]:
## A list
this_is_a_list = [1,3,2,"b"]
print("Original: ", this_is_a_list)
Add element
In [2]:
## Add elements
this_is_a_list.append("c")
print("Added c: ", this_is_a_list)
Retrieve element
CAREFUL: The first element has index 0, which means that this_is_a_list[0] gets the first element and this_is_a_list[1] gets the second element
In [11]:
## Get element. The first element has index 0, which means that this_is_a_list[0] gets the first element
print("Fourth element: ", this_is_a_list[3])
Get slices
In [7]:
this_is_a_list
Out[7]:
In [6]:
this_is_a_list[1:3]
Out[6]:
In [ ]:
In [4]:
#All list
this_is_a_list = [0,1,2,3,4]
print(this_is_a_list)
#"Second to end element (included)"
print("Second to end element: ", this_is_a_list[1:])
In [5]:
#Second to the fourth (included)
print("Second to the fourth (included): ",this_is_a_list[1:4])
#First to the last element (not included)
print("First to the last element (not included): ",this_is_a_list[:-1])
Remove element
In [6]:
print("Original: ", this_is_a_list)
## Remove 4th element and save it as removed_element
removed_element = this_is_a_list.pop(3)
print(removed_element)
print("The list is now: ", this_is_a_list)
In [8]:
this_is_a_list = [1,2,3,4,5]
Search
In [9]:
#Search
print(3 in this_is_a_list)
In [10]:
#Find index
print(this_is_a_list.index(4))
Length
In [10]:
## Count the number of elements in a list
this_is_a_list = [1, 3, 2]
len_this_is_a_list = len(this_is_a_list) #you tell the computer to sum it and save it as `sum_this_is_a_list`
print("Length: ", len_this_is_a_list)
Sort
In [17]:
## Sort a list
this_is_a_list = [1, 3, 2]
this_is_a_list = sorted(this_is_a_list) #you tell the computer to sort it, and to save it with the same name
print("Sorted: ", this_is_a_list)
Sum
In [16]:
## Sum a list
this_is_a_list = [1, 3, 2]
sum_this_is_a_list = sum(this_is_a_list) #you tell the computer to sum it and save it as `sum_this_is_a_list`
print("Sum: ", sum_this_is_a_list)
In [6]:
sum(["1","2"])
Notice that we wrote this_is_a_list.pop()
, but sum(this_is_a_list)
and sorted(this_is_a_list)
This is because .pop()
only works in lists (.pop()
is a method of the data structure List), while sum()
and sorted()
work with many different data structures.
Some standard functions:
In [11]:
print(float("1"))
start
and stop
(not including the number stop
), jumping in steps of size step
.
In [29]:
## Create a list [0,1,2,3,4]
print(list(range(0,5,1)))
print(range(5)) #for all practical issues you don't need to convert them
In [12]:
## Create the list [1,2,3,4]
print(list(range(1,5,1)))
In [13]:
## Create the list [1,3,4,5,7,9]
print(list(range(1,10,2)))
In [4]:
import numpy as np
np.arange(0,5,0.5)
Out[4]:
WORKING WITH STRINGS AND LISTS
In [278]:
#we create a string and call it "eggs and bacon"
our_string = "eggs and bacon"
#now we divide it into words, creating a list and saving it with the name our_list_of_wrods
our_list_of_words = our_string.split()
print(our_list_of_words)
In [279]:
#we can do the opossite and join the words using the function "join".
" ".join(our_list_of_words)
Out[279]:
In [280]:
#we can join the words using any character we want in the quoted par of (" ".join)
":::".join(our_list_of_words)
Out[280]:
In [23]:
#we can also divide a string using other characters instead of space.
#for example let's split in the "a"s
our_string = "eggs and bacon"
print(our_string.split("a"))
In [15]:
#we can change parts of the word
our_string = "eggs and bacon"
print(our_string.replace("bacon","tofu"))
In [286]:
#we can find where a word start. For example let's find how many characters there are before "and"
our_string = "eggs and bacon and"
print(our_string.find("and"))
In [16]:
#and we can use this to slice the string like we did with the list
our_string1 = "10 events"
our_string2 = "120 events"
our_string3 = "2 events"
#we find the index of and
index_and1 = our_string1.find("event")
index_and2 = our_string2.find("event")
index_and3 = our_string3.find("event")
print(index_and1)
print(index_and2)
print(index_and3)
In [17]:
print(our_string1)
In [21]:
#and keep all the string until that index
print(our_string1[:our_string1.find("event")])
print(our_string2[:index_and2])
print(our_string3[:index_and3])
ipython help
In [18]:
this_is_a_list?
In [20]:
our_string1.find?
In [30]:
this_is_a_tuple = (1,3,2,"b")
print(this_is_a_tuple)
this_is_a_list = list(this_is_a_tuple)
print(this_is_a_list)
In [33]:
#If we try to pop and elment, like with lists, we get an error.
this_is_a_tuple.pop(0)
In [272]:
Image(filename='./images/set-operations-illustrated-with-venn-diagrams.png')
Out[272]:
In [12]:
{1,2,3} - {2,5,6}
Out[12]:
In [273]:
#Let's create a list with repeated elements
this_is_a_list = [3,1,1,1,2,3]
print(this_is_a_list)
#Now we convert it to a set and see how the repeated elements are gone
this_is_a_set1 = set(this_is_a_list)
print(this_is_a_set1)
#You can also create sets like this
this_is_a_set2 = {1,2,4}
print(this_is_a_set2)
In [183]:
## Union
print(this_is_a_set1 | this_is_a_set2)
In [184]:
## Intersection
print(this_is_a_set1 & this_is_a_set2)
In [185]:
## Diference set_1 - set2
print(this_is_a_set1 - this_is_a_set2)
In [275]:
## Diference set_2 - set1
print(this_is_a_set2 - this_is_a_set1)
In [276]:
## Very useful for words. Imagine we have two articles, one saying "eggs bacon python" and another saying "bacon spam"
#We can find which words they have in common
print({"eggs","bacon","python"} & {"bacon","spam"})
We won't use dicts today so we'll cover them on Thursday
In [14]:
#First we need to import it
import numpy as np
In [15]:
#Sum of a list
a_list = [0,1,2,3,4,5,6]
#Using the standard function
print(sum(a_list))
#Using numpy
print(np.sum(a_list))
In [16]:
##How to find the mean() of a list of numbers? mean() does not exist
print(mean(a_list))
In [23]:
a_list = [0,1,2,3,4,5,6]
a_np_array = np.array(a_list)
a_np_array**10
Out[23]:
In [16]:
##but numpy rescues you
import numpy as np
a_list = [0,1,2,3,4,5,6]
#first convert the list to an array.
#This is not required when you use np.mean(a_list), but it is required in some other ocassions.
a_np_array = np.array(a_list)
print(type(a_list))
print(type(a_np_array))
print(np.mean(a_np_array))
In [17]:
##you can take powers
print(a_np_array**2) #this would not work with a list
In [257]:
##or square roots
print(np.sqrt(a_np_array)) #this would work with a list
In [252]:
#or some basic statistics
import numpy as np
import scipy.stats #another library for more complicated statistics
#we create a list with numbers 0,1,2...998,999
numElements = 1000
this_is_an_list = list(range(numElements))
this_is_an_array = np.array(this_is_an_list)
print(this_is_an_array[:10]) #print only the 10 first elements to make sure it's okay
#and print some stats
print(np.mean(this_is_an_array))
print(np.std(this_is_an_array))
print(np.median(this_is_an_array))
print(scipy.stats.mode(this_is_an_array))
print(scipy.stats.skew(this_is_an_array))
In [24]:
#Let's start with this array
this_is_an_array = np.array([1,2,3,4,5,6,7,8,9,10])
print(this_is_an_array)
In [109]:
#If we want the elements greater or equal (>=) to 5, we could do:
print(this_is_an_array[4:])
In [25]:
#However this case was very easy, but what getting the elements >= 5 in here:
#[ 5, 9, 4, 3, 2, 8, 6, 7, 10, 1]
unsorted_array = np.array([ 5, 9, 4, 3, 2, 8, 6, 7, 10, 1])
print(unsorted_array)
In [26]:
unsorted_array >= 5
Out[26]:
In [30]:
unsorted_array[np.array([ True, True, False, False, False, True, True, True, True, False])]
Out[30]:
In [261]:
#We can do the following: unsorted_array[unsorted_array >= 5]
#which means keep the elements of unsorted_array that are larger or equal to 5
#unsorted_array[condition]
print(unsorted_array[unsorted_array >= 5])
In [116]:
#This is a special kind of slicing where you filter elements with a condition.
#Lists do not allow you to do this
#How does it work? By creating another array, of the same lenght, with False and Trues
print(unsorted_array)
print(this_is_an_array >= 5) #the same than comparing 3 >= 5, but numpy compares every number inside the array.
condition
is Trueunsorted_array
.condition
array.unsorted_array
In [120]:
#We can use variables
condition = this_is_an_array >= 5 #the computer does the = at the end always
print(unsorted_array[condition])
In [125]:
#What if we want to get the numbers between 5 and 9 (6, 7,9)?
unsorted_array = np.array([ 5, 9, 4, 3, 2, 8, 6, 7, 10, 1])
#We do it with two steps
#Step 1: Greater than 5
condition_gt5 = unsorted_array > 5
unsorted_array_gt5 = unsorted_array[condition_gt5]
print("Greater than 5", unsorted_array_gt5)
#Step 2: Lower than 9
condition_lw9 = unsorted_array_gt5 < 9 #we are using the new array
unsorted_array_gt5_lw9 = unsorted_array_gt5[condition_lw9]
print("Greater than 5 and lower than 9", unsorted_array_gt5_lw9)
In [25]:
unsorted_array = np.array([ 5, 9, 4, 3, 2, 8, 6, 7, 10, 1])
condition_gt5 = unsorted_array > 5
condition_lw9 = unsorted_array < 9
print(unsorted_array[condition_gt5 & condition_lw9])
We'll explore it through examples
In [31]:
##first we import it
import pandas as pd
In [19]:
Image(url="http://www.relatably.com/m/img/boring-memes/when-my-friend-tell-me-a-boring-story_o_1588863.jpg")
Out[19]: