In [1]:
print('Hello World!')
In [2]:
365 * 24 * 60 * 60 # how many seconds are in a year
Out[2]:
In [3]:
_ / 1e6 # underscore refers to the output of the previous cell
Out[3]:
In [4]:
x = 4 + 3
print(x)
This is a Latex equation:
$\int_0^\infty x^{-\alpha}$
In [5]:
%matplotlib inline
from matplotlib.pyplot import plot
plot([0, 1, 0, 1])
Out[5]:
This is a plot using matplotlib of a vector.
We can save and close the browser tab, the kernel running the notebook will continue to run. To prevent this, we can use the 'File -> Close and halt'. We can download the notebook in a number of formats (html, markdown, pdf etc.)
In [6]:
# Declare filename:
!ls
In [7]:
filename = './shakespeare.txt'
!echo $filename
print(filename)
In [8]:
# head:
!head -n 3 $filename
In [9]:
# tail:
!tail -n 10 $filename
In [10]:
# wc:
!wc $filename
In [11]:
!wc -l $filename
In [12]:
# cat:
!cat $filename | wc -l
In [13]:
# grep:
!grep -i 'parchment' $filename
In [14]:
# output matching patten one per line and then count number of lines
!cat $filename | grep -o 'liberty' | wc -l
In [15]:
# sed:
# replace all instances of 'parchment' to 'manuscript'
!sed -e 's/parchment/manuscript/g' $filename > temp.txt
In [16]:
# sort:
!head -n 5 $filename
In [17]:
!head -n 5 $filename | sort
In [18]:
# columns separated by ' ', sort on column 2 (-ks), case insensitive (-f)
!head -n 5 $filename | sort -f -t' ' -k2
In [19]:
!sort $filename | wc -l
In [20]:
# uniq command for getting unique records using -u option
!sort $filename | uniq -u | wc -l
In [21]:
# Count the most frequent words in the text in Unix:
!sed -e 's/ /\'$'\n/g' < $filename | sort | uniq -c | sort -nr | head -15
In [22]:
!sed -e 's/ /\'$'\n/g' < $filename | sort | uniq -c | sort -nr | head -15 > count_vs_words
In [23]:
!cat count_vs_words
In [24]:
# plot by importing words counts into Python
%matplotlib inline
import matplotlib.pyplot as plt
import csv
xTicks = []
y = []
with open('count_vs_words', 'r') as csvfile:
plots = csv.reader(csvfile, delimiter = ' ')
for row in plots:
y.append(int(row[-2]))
xTicks.append(str(row[-1]))
# remove the count of spaces (first line)
y = y[1:]
xTicks = xTicks[1:]
# plot:
x = range(len(y))
plt.figure(figsize = (10, 10))
plt.xticks(x, xTicks, rotation = 90)
plt.plot(x, y, '*')
Out[24]:
Key-features:
Multi-dimensional arrays, built-in array operations, broadcasting (simplified interactions), integration (C, C++).
Why NumPy:
Many packages are built on top of NumPy!
Rank 1 ndarray:
In [25]:
import numpy as np
In [26]:
an_array = np.array([3, 33, 333]) # create a rank 1 array
print(type(an_array)) # the type of a ndarray
In [27]:
# test the shape of the array:
an_array.shape
Out[27]:
In [28]:
# because it's rank 1, we need only one index to access each element:
print(an_array[0], an_array[1], an_array[2])
In [29]:
# arrays are mutable:
an_array[0] = 88
an_array
Out[29]:
In [31]:
# you can't assign a string to a array:
an_array[0] = 'Spam'
Rank 2 ndarray
In [32]:
another = np.array([[11, 12, 13], [21, 22, 23]]) # rank 2 ndarray
print(another)
print('The shape is:', another.shape)
print('Indexing the elements: ', another[0, 0], another[0, 1], another[1, 0])
There are many ways to create a ndarray
In [33]:
# a 2x2 ndarray filled with zeros
ex1 = np.zeros((2, 2))
ex1
Out[33]:
In [34]:
# a 2x2 ndarray filled with 9.0
ex2 = np.full((2, 2), 9.0)
ex2
Out[34]:
In [35]:
# a 2x2 ndarray filled with a diagonal of 1s and other 0s:
ex3 = np.eye(2, 2)
ex3
Out[35]:
In [36]:
# a ndarray of 1s:
ex4 = np.ones((1, 2))
ex4
Out[36]:
In [37]:
# the above array is rank 2:
print(ex4.shape)
#which means we need two indexes to access an element:
print()
print(ex4[0, 1])
In [38]:
# ndarray of random float between 0 and 1:
ex5 = np.random.random((2, 2))
ex5
Out[38]:
In [39]:
import numpy as np
In [40]:
# rank 2 ndarray of shape (3, 4):
an_array = np.array([[11, 12, 13, 14],
[21, 22, 23, 24],
[31, 32, 33, 34]])
an_array
Out[40]:
In [41]:
# slice the first 2 rows and 2 columns:
# first value is inclusive, second value exclusive
a_slice = an_array[:2, 1:3]
a_slice
Out[41]:
The 'a_slice' has its own indices (different from 'an_array'). When you modify 'a_slice', you actually modify the underlying ndarray, because they are both pointing to the same object:
In [42]:
print('Before:', an_array[0, 1])
a_slice[0, 0] = 1000
print('After:', an_array[0, 1])
You can make a copy of another array, so you have two ndarrays pointing to two different objects:
In [43]:
an_array2 = np.array([[11, 12, 13, 14],
[21, 22, 23, 24],
[31, 32, 33, 34]])
a_slice2 = np.array(an_array2[:2, 1:3]) # THIS IS THE TRICK!!!
print('Before:', an_array2[0, 1])
a_slice2[0, 0] = 1000
print('After:', an_array2[0, 1])
Use both integer indexing and slice indexing
In [44]:
an_array2 = np.array([[11, 12, 13, 14],
[21, 22, 23, 24],
[31, 32, 33, 34]])
row_rank1 = an_array[1, :] # only 1 row and all columns (:)(
print(row_rank1, row_rank1.shape) # notice only a single []
In [45]:
# slicing alone: generates an array of the same rank as the an_array:
row_rank2 = an_array[1:2, :] # rank 2 view
print(row_rank2, row_rank2.shape) # notice the [[]]
Array indexing for changing elements:
In [46]:
# create a new array:
an_array = np.array([[11, 12, 13, 14],
[21, 22, 23, 24],
[31, 32, 33, 34],
[41, 42, 43, 44]])
print('Original array:')
an_array
Out[46]:
In [47]:
# create an array of indices:
col_indices = np.array([0, 1, 2, 0])
print('\nCol indices picked:', col_indices)
row_indices = np.arange(4)
print('\nRow indices picked:', row_indices)
In [48]:
# examine the pairings of row_indices and col_indices:
for row, col in zip(row_indices, col_indices):
print(row, ', ', col)
In [49]:
# select one element from each row:
print('Values in the an_array at those indices:\n', an_array[row_indices, col_indices])
In [50]:
# change one element from each row using the indices selected:
an_array[row_indices, col_indices] += 1000
print('Changed array:\n', an_array)
In [51]:
# create a 3x2 ndarray:
an_array = np.array([[11, 12], [21, 22], [31, 32]])
an_array
Out[51]:
In [52]:
# create a filter which will be a boolean:
filter1 = (an_array > 15)
filter1
Out[52]:
Notice the filter has the same shape as the original ndarray, but filled with True and False according to the result of the boolean logic.
In [53]:
# we can now select just those elements which meet that criteria
an_array[filter1]
Out[53]:
In [54]:
# for short, we could directly used this approach without creating a new object:
an_array[an_array > 15]
Out[54]:
In [55]:
# greater than 20 and less than 30:
an_array[(an_array > 20) & (an_array < 30)]
Out[55]:
In [56]:
# only even values:
an_array[an_array % 2 == 0]
Out[56]:
We can actually change the values based on some filter:
In [57]:
an_array[an_array % 2 == 0] += 100
an_array
Out[57]:
In [58]:
import numpy as np
In [59]:
ex1 = np.array([11, 22]) # dtype is inferred as int64
ex1.dtype
Out[59]:
In [60]:
ex2 = np.array([11.0, 12.0]) # dtype is inferred as float64
ex2.dtype
Out[60]:
In [61]:
ex3 = np.array([11, 21], dtype = np.int64) # "force" the dtype as int64
ex3.dtype
Out[61]:
In [62]:
ex5 = np.array([11, 21], dtype = np.float64) # "force" the dtype as float64
print(ex5.dtype)
print()
print(ex5)
Arithmetic Array Operations
In [63]:
x = np.array([[111, 112], [121, 122]], dtype = np.int)
y = np.array([[211.1, 212.1], [221.1, 222.1]], dtype = np.float64)
print(x, '\n\n', y)
In [64]:
# add
print(x + y, '\n\n', np.add(x, y))
In [65]:
# subtract
print(x - y, '\n\n', np.subtract(x, y))
In [66]:
# multiply
print(x * y, '\n\n', np.multiply(x, y))
In [67]:
# divide
print(x / y, '\n\n', np.divide(x, y))
In [68]:
# square root
np.sqrt(x)
Out[68]:
In [69]:
# exponent (e ** x)
np.exp(x)
Out[69]:
In [70]:
# setup a random 2x4 matrix
arr = 10 * np.random.randn(2, 5)
print(arr)
In [71]:
# mean for all elements
arr.mean()
Out[71]:
In [72]:
# mean by row
arr.mean(axis = 1) # axis = 1 means calculate by row (axis = 1 is for column)
Out[72]:
In [73]:
# mean by column
arr.mean(axis = 0)
Out[73]:
In [74]:
# sum all elements
arr.sum()
Out[74]:
In [75]:
# median by column
np.median(arr, axis = 0)
Out[75]:
Sorting
In [76]:
# create a 10 element array of randoms
unsorted = np.random.randn(10)
unsorted
Out[76]:
In [77]:
# create a copy of the unsorted array and sort it:
sorted1 = np.array(unsorted)
sorted1.sort()
print(sorted1, '\n\n', unsorted)
Finding Unique Elements
In [78]:
array = np.array([1, 2, 1, 4, 2, 1, 4, 2])
np.unique(array)
Out[78]:
Set Operations with np.array data type
In [79]:
s1 = np.array(['desk', 'chair', 'bulb'])
s2 = np.array(['lamp', 'bulb', 'chair'])
print(s1, s2)
In [80]:
np.intersect1d(s1, s2)
Out[80]:
In [81]:
np.union1d(s1, s2)
Out[81]:
In [82]:
# elements in s1 that are not in s2
np.setdiff1d(s1, s2)
Out[82]:
In [83]:
# element of s1 is also in s2
np.in1d(s1, s2)
Out[83]:
In [84]:
start = np.zeros((4, 3))
start
Out[84]:
In [85]:
# create a rank 1 ndarray with 3 values
add_rows = np.array([1, 0, 2])
add_rows
Out[85]:
In [86]:
# row broadcasting
start + add_rows
Out[86]:
In [87]:
# column broadcasting
add_cols = np.array([[0, 1, 2, 3]]).T
print(add_cols, '\n\n', add_cols + start)
In [88]:
# broadcasting in both dimensions:
add_scalar = np.array([1])
print(start + add_scalar)
In [89]:
from numpy import arange
from timeit import Timer
size = 1000000
timeits = 1000
In [90]:
# create the ndarray with values 0, 1, 2, ..., size-1
nd_array = arange(size)
print(type(nd_array))
In [91]:
# timer expects the operations as a parameter,
# here we pass nd_array.sum()
%time
nd_array.sum()
Out[91]:
In [92]:
# create the list with values 0, 1, 2, ..., size-1
a_list = list(range(size))
print(type(a_list))
In [93]:
%time
sum(a_list)
Out[93]:
WIFIRE is an integrated system for wildfire analysis which is capable of handling changing urban dynamics and climate. The website http://www.landfire.gov/ provides data on earth surface. Each image is formed by many pixels, a pixel is a square which doesn't vary its color. We can represent a pixel's color by three 8-bit numbers (from 0 to 255) representing the Red, Green and Blue. For example, Black is (0, 0, 0), White is (255, 255, 255), Red is (255, 0, 0), Green is (0, 255, 0), and Blue is (0, 0, 255).
In [94]:
%matplotlib inline
import numpy as np
from scipy import misc
import matplotlib.pyplot as plt
In [95]:
from skimage import data
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
type(photo_data)
Out[95]:
In [96]:
plt.figure(figsize = (15, 15))
plt.imshow(photo_data)
Out[96]:
In [97]:
photo_data.shape
Out[97]:
The first two numbers are the length and the width, and the third number (i.e. 3) is for the three layers: Red, Green and Blue. The Red pixels indicates Altitude, the Blue indicates Aspect and the Green indicates Slope. The higher the intensity of the color the higher the altitude, aspect and slope.
In [98]:
photo_data.size
Out[98]:
In [99]:
photo_data.min(), photo_data.max()
Out[99]:
In [100]:
photo_data.mean()
Out[100]:
In [101]:
# pixel on the 150th row and 250th column
photo_data[150, 250]
Out[101]:
In [102]:
# only the green values
photo_data[150, 250, 1]
Out[102]:
In [103]:
# set a pixel to all zeros and show image (doesn't change much)
photo_data[150, 250] = 0
plt.figure(figsize = (10, 10))
plt.imshow(photo_data)
Out[103]:
In [104]:
# set the green layer for rows 200 to 800 to full intensity
photo_data[200:800, :, 1] = 255
plt.figure(figsize = (10, 10))
plt.imshow(photo_data)
Out[104]:
In [105]:
# set all layers for rows 200 to 800 to full intensity (white)
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
photo_data[200:800, :] = 255
plt.figure(figsize = (10, 10))
plt.imshow(photo_data)
Out[105]:
In [106]:
# pick all pixels with low values
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
print('Original shape:', photo_data.shape)
low_value_filter = photo_data < 200
print('Low value filter shape:', low_value_filter.shape)
In [107]:
# filtering out low values
plt.figure(figsize = (10, 10))
plt.imshow(photo_data)
photo_data[low_value_filter] = 0
plt.figure(figsize = (10, 10))
plt.imshow(photo_data)
Out[107]:
In [108]:
# more row and column operations
rows_range = np.arange(len(photo_data))
cols_range = rows_range
print(type(rows_range))
In [109]:
photo_data[rows_range, cols_range] = 255
plt.figure(figsize = (15, 15))
plt.imshow(photo_data)
Out[109]:
In [110]:
%matplotlib inline
import numpy as np
from scipy import misc
import matplotlib.pyplot as plt
In [111]:
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
total_rows, total_cols, total_layers = photo_data.shape
# compact method for creating a multidimensional ndarray operations in single lines:
X, Y = np.ogrid[: total_rows, : total_cols]
print('X shape is', X.shape, 'Y shape is', Y.shape)
In [112]:
# calculate the center point
center_row, center_col = total_rows / 2, total_cols / 2
# distance from the center
dist_from_center = (X - center_row) ** 2 + (Y - center_col) ** 2
# radius
radius = (total_rows / 2) ** 2
# mask
circular_mask = (dist_from_center > radius)
In [113]:
photo_data[circular_mask] = 0
plt.figure(figsize = (10, 10))
plt.imshow(photo_data)
Out[113]:
Further masking: let's get just the upper half disc
In [114]:
X, Y = np.ogrid[: total_rows, : total_cols]
half_upper = X < center_row # generates a mask for all rows above center
half_upper_mask = np.logical_and(half_upper, circular_mask)
In [115]:
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
photo_data[half_upper_mask] = 255
plt.imshow(photo_data)
Out[115]:
In [116]:
import random
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
photo_data[half_upper_mask] = random.randint(200, 255)
plt.imshow(photo_data)
Out[116]:
Let's try to highlight all the high altitude areas by detecting the high intensity Red pixels and muting down other areas
In [117]:
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
red_mask = photo_data[:, :, 0] < 150
photo_data[red_mask] = 0
plt.imshow(photo_data);
In [118]:
# the same as before but for the Green pixels
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
green_mask = photo_data[:, :, 1] < 150
photo_data[green_mask] = 0
plt.imshow(photo_data);
In [119]:
# the same as before but for the Blue pixels
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
green_mask = photo_data[:, :, 2] < 150
photo_data[green_mask] = 0
plt.imshow(photo_data);
Composite mask for all three layers
In [120]:
photo_data = misc.imread('/home/jayme/Courses/Python4DS/Week-3-Numpy/wifire/sd-3layers.jpg')
red_mask = photo_data[:, :, 0] < 150
green_mask = photo_data[:, :, 1] > 100
blue_mask = photo_data[:, :, 2] < 100
final_mask = np.logical_and(red_mask, green_mask, blue_mask)
photo_data[final_mask] = 0
plt.imshow(photo_data);
(2017-07-25 15:13)