In [2]:
# import numpy by following the convention
import numpy as np
In [2]:
# manual construction of arrays
# 1-D
a = np.array([0,1,2,3])
a
Out[2]:
In [3]:
# 2-D
b = np.array([[1,2,3],[5,6,7]])
b
Out[3]:
In [5]:
# check for array dimension
a.ndim, b.ndim
Out[5]:
In [6]:
# check for shape of the array
a.shape, b.shape
Out[6]:
In [7]:
# functions for creating arrays
a = np.arange(1,9,2) # start, end(exclusive), step
a = np.arange(10)
a
Out[7]:
In [10]:
a = np.linspace(0,1,6) # start, end, num-points
a
Out[10]:
In [14]:
a = np.ones((3,2)) # a matrix of ones
a
Out[14]:
In [16]:
a= np.zeros((2,3)) # a matrix of zeros
a
Out[16]:
In [17]:
a = np.eye(3) # an identify matrix
a
Out[17]:
In [19]:
a = np.diag(np.array([1,2,3,4])) # a diagonal matrix
a
Out[19]:
In [26]:
# generating random numbers
# set seed
np.random.seed(1234)
# generate a vector of length 4, in which elements are iid draws from UNIF(0,1)
a = np.random.rand(4)
print(a)
# generate a vector of length 4, in which elements are iid draws from standard normal
a = np.random.randn(4)
print(a)
In [28]:
a = np.array([1,2,3],dtype=float)
a.dtype
Out[28]:
In [29]:
a = np.array([True, False, True])
a.dtype
Out[29]:
In [37]:
import matplotlib.pyplot as plt
# to display plots in the notebook
%pylab inline
x = np.linspace(0,3,20)
y = np.linspace(0,9,20)
plt.plot(x,y) # line plot
plt.plot(x,y,'o') # dot plot
Out[37]:
In [40]:
# indices begin at 0
a = np.arange(10)
a[0], a[1], a[-1]
Out[40]:
In [42]:
# slicing
a[2:5:2] #[start:end:step]
Out[42]:
In [47]:
a[::]
Out[47]:
In [48]:
a[::-1]
Out[48]:
In [52]:
# matrices
a = np.diag(np.arange(3))
a
Out[52]:
In [53]:
# slice an element in matrix
a[1,1], a[1,2]
Out[53]:
In [57]:
# numpy array is mutable, and thus we could assign new values to it
a[1,1] = 10
a
Out[57]:
In [58]:
# the second column of a
a[:,1]
Out[58]:
In [59]:
# the first row of a
a[0,:]
Out[59]:
In [61]:
# a slicing operation creates a view on the original array
a = np.arange(10)
b = a[::2]
b[0] = 100
a
Out[61]:
In [62]:
# force a copy
a = np.arange(10)
b = a[::2].copy()
b[0] = 100
a
Out[62]:
In [64]:
# indexing with booleans
a = np.arange(10)
ind = (a>5)
a[ind]
Out[64]:
In [67]:
# indexing with an array of integers
a = np.arange(10,100,10)
a[[2,3,4,2,1]]
Out[67]:
In [69]:
# with scalars
a = np.array([1,2,3,4])
a + 1
2**a
Out[69]:
In [71]:
# arithmetic operations are elementwise
b = np.ones(4)
a - b
a*b
Out[71]:
In [72]:
# array multiplications
c = np.ones((3,3))
c*c
Out[72]:
In [73]:
# matrix multiplication
c.dot(c)
Out[73]:
In [75]:
# comparisons
a == b
a > b
Out[75]:
In [76]:
# array-wise comparison
np.array_equal(a,b)
Out[76]:
In [77]:
# transcendental functions
np.sin(a)
np.log(a)
np.exp(a)
Out[77]:
In [1]:
# shape mismatches (this will cause an error)
b = np.array([1,2])
a + b
In [81]:
# transposition
a = np.triu(np.ones((3,3)),1)
a.T
Out[81]:
In [84]:
# computing sums
a = np.array([1,2,3,4])
a.sum()
Out[84]:
In [88]:
a = np.array([[1,2],[3,4]])
a.sum()
a.sum(axis=0) # column sum
a.sum(axis=1) # row sum
Out[88]:
In [94]:
# other reductions
a = np.array([1,2,3,4])
a.min()
a.max()
a.argmin()
a.argmax()
a.mean()
a.std()
Out[94]:
In [96]:
a = np.arange(0,40,10)
a = a[:,np.newaxis] # add a new axis -> 2D array
b = np.array([0,1,2])
a + b
Out[96]:
In [98]:
# create a matrix indicating the difference between any two observations
x = np.linspace(0,10,5)
y = x[:,np.newaxis]
np.abs(x-y)
Out[98]:
In [100]:
# flattening
a = np.array([[1,2],[3,4]])
b= a.ravel()
b
Out[100]:
In [102]:
c = b.reshape((2,2))
c
Out[102]:
In [104]:
a = np.array([[6,3,1],[9,1,4]]) # sort each row
b = np.sort(a,axis=1)
b
Out[104]:
In [105]:
c = np.sort(a,axis=0) # sort each column
c
Out[105]:
In [107]:
# sorting with fancy indexing
a = np.array([14,13,11,12])
j = np.argsort(a)
j
a[j]
Out[107]:
In [109]:
# finding minima and maxima
a = np.array([4,22,3,9])
np.argmax(a)
np.argmin(a)
Out[109]:
In [117]:
# Q1
# For the 2-D array (without typing it explicityly)
x = np.arange(1,12,5)
y = np.arange(5)[:,np.newaxis]
z = x + y
z
Out[117]:
In [119]:
# generate a new array containing its 2nd and 4th row
m = z[(1,3),:]
m
Out[119]:
In [121]:
# Q2
# divide each column of the array elementwise
a = np.arange(25).reshape(5,5)
a
b = np.array([1,5,10,15,20])
b = b[:,np.newaxis]
a / b
Out[121]:
In [124]:
# Q3
# generate a 10 by 3 array of random numbers
np.random.seed(1234)
a = np.random.rand(30).reshape(10,3)
a
Out[124]:
In [132]:
# for each row, pick the number closest to 0.5
b = np.abs(a - 0.5)
ind = np.argmin(b,axis=1)
c = a[np.arange(10),ind]
c
Out[132]:
In [142]:
a = range(1000)
%timeit [i**2 for i in a]
In [143]:
b = np.arange(1000)
%timeit b**2
In [2]:
a = range(10000)
%timeit [i+1 for i in a]
In [5]:
c = np.arange(10000)
%timeit c+1
Python for Data Analysis
Formatting
In [3]:
import urllib; from IPython.core.display import HTML
HTML(urllib.urlopen('http://bit.ly/1Ki3iXw').read())
Out[3]: