In [1]:
import numpy as np
x = np.array([1, 2, 3])
y = np.zeros((2, 2))
z = np.ones((2, 2))
a = np.eye((2))
print(x)
print(y)
print(z)
print(a)
In [2]:
# Be aware of data type. Numpy only allows one type.
np.array([1.0, 1])
Out[2]:
In [3]:
import sys
# How big are these things?
n = 10
print(sys.getsizeof(int(1)), sys.getsizeof(np.array(1)), sys.getsizeof([1]))
print(sys.getsizeof(np.ones((n))),
sys.getsizeof([1. for i in range(0, n)]))
print(sys.getsizeof(np.ones((n), dtype = np.float32)),
sys.getsizeof([1. for i in range(0, n)]))
print(sys.getsizeof(np.ones((n), dtype = np.float32)/2),
sys.getsizeof([1./2 for i in range(0, n)]))
In [ ]:
print(sys.getsizeof(np.ones((n), dtype = np.float32)/2),
sys.getsizeof([1./2 for i in range(0, n)]))
In [5]:
# Dimensions are important! Be careful how big the arrays get
dims = 5
D = 10.
print(sys.getsizeof(np.ones(([D for i in range(0, dims)]))))
In [6]:
a = np.eye(2)
print(a)
In [7]:
# Referencing a single entry
a[0, 0]
Out[7]:
In [8]:
# Assignment
a[0, 0] = 2
print(a)
In [9]:
# Also supports slicing
print(a[0, :])
print(a[1, :])
In [10]:
# Equality creates a view
a = np.eye(2)
b = a
print(a)
print(b)
In [11]:
# Assigning then changes original. View references this
a[0, 0] = 0
print(a)
print(b)
In [12]:
# If you need a copy, be explicit
a = np.eye(2)
b[:] = a
c = a.copy()
print(a)
print(b)
print(c)
In [13]:
a[0, 0] = 0
print(a)
print(b)
print(c)
In [14]:
a = np.eye(2)
b = np.eye(2)
c = 2
#Multiplicaiton is done elementwise
print(a * b)
In [15]:
#Scalar multiplcation is broadcast
print(a * c)
In [16]:
# Scalar addition is broadcast in the same way
print(a + c)
In [19]:
# Some arrays to broadcast
d = np.array([1, 2])
e = np.vstack((a, np.zeros(2)))
f = np.array([[1, 2], [3, 4]])
In [20]:
print(d,e,f)
In [21]:
# Required that all of the dimensions either match or equal one
print(e.shape)
print(e)
print(d.shape)
print(d)
print(d + e)
print(e + a)
In [22]:
# Notice the duplication along the smallest axis
print(d)
print(e)
print(d + e)
In [23]:
# A one d array is neither column nor row
# This means the broadcasting rule seems ambiguous
print(d.shape)
print(d == d.T)
In [25]:
d = np.array([1., 1.])
print(a + d)
In [26]:
# Broadcasting rules move along the first matching axis FROM THE RIGHT
print(f.shape)
print(f)
print(d.shape)
print(d)
print(f + d)
print(f + d.T)
In [27]:
# You can change this by adding a new axis
# This helps to be specific about shape
print(d[:, np.newaxis].shape)
print(f + d[:, np.newaxis])
In [36]:
import time
length = 10
a = [i for i in range(0, length)]
b = [i for i in range(0, length)]
c = []
t0 = time.time()
for i in range(len(a)):
c.append(a[i]*b[i])
t1 = time.time()
print("Process executed in : %s : seconds." %(t1 - t0))
In [37]:
a
Out[37]:
In [43]:
a = np.arange(0, length).reshape(10,1)
b = np.arange(0, length)
t0 = time.time()
C = a * b
t1 = time.time()
print("Process executed in : %s : seconds." %(t1 - t0))
In [44]:
a
Out[44]:
In [42]:
a[:, np.newaxis].shape
Out[42]:
In [45]:
import scipy.integrate
def integrand(x, k):
return k*np.exp(-k*x)
k = 1.0
scipy.integrate.quad(integrand, 0, np.inf, args = (k))
Out[45]:
In [46]:
# Note: it is unclear what quadrature rule is used here
scipy.integrate.quad(lambda x: k*np.exp(-k*x), 0, np.inf)
Out[46]:
Methods:
"Downhill simplex method". Generates a simplex of dimension n+1 and then uses a simple algorithm (similar to a bisection algorithm) to find local optima.
"Broyden-Fletcher-Goldfarb-Shanno Algorithm". Considered a "quasi-newton" method. A newton step would calculate the hessian directly, where quasi-newton methods approximate it in some way.
"Powell's Conjugate Direction Method". A sort of combination of steps in the taxi-cab method. Instead of searching only along a single vecor, take a linear combination of the gradients.
"Conjugate Gradient Method". Most useful for sparse, linear systems. You'll notice here it is unsuccessful.
In [47]:
import scipy.optimize
import time
def rosenbrock(x, a, b):
return (a - x[0])**2 + b*(x[1] - x[0]**2)**2
a = 1.
b = 100.
x0 = np.array([2., 3.])
t0 = time.time()
res = scipy.optimize.minimize(rosenbrock, x0, args=(a, b), method='Nelder-Mead')
t1 = time.time()
print("\nProcess executed in : %s : seconds.\n" %(t1 - t0))
print(res)
t0 = time.time()
res = scipy.optimize.minimize(rosenbrock, x0, args=(a, b), method='BFGS')
t1 = time.time()
print("\nProcess executed in : %s : seconds.\n" %(t1 - t0))
print(res)
t0 = time.time()
res = scipy.optimize.minimize(rosenbrock, x0, args=(a, b), method='Powell')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(res)
t0 = time.time()
res = scipy.optimize.minimize(rosenbrock, x0, args=(a, b), method='CG')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(res)
Methods:
"Hybrid". From MINPACK, essentially a modified Powell method.
"Broyden's Method". A quasi-newton method for multidimensional root finding. Calculate the jacobian only once, then do an update each iteration.
"Anderson Mixing". A quasi-newton method. Approximate the jacobian by the "best" solution in the space spanned by the last M vectors... whatever that means!
"Linear Mixing". Similar to Anderson method.
"Krylov Methods". Approximate the jacobian by a spanning basis of the krylov space. Very neat.
In [48]:
def f(x, a, b):
return np.array([a*(1 - x[0]), b*(x[1] - x[0]**2)**2])
a = 1.
b = 100.
x0 = np.array([10., 2.])
t0 = time.time()
sol = scipy.optimize.root(f, x0, args=(a, b), method='hybr')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(sol)
t0 = time.time()
sol = scipy.optimize.root(f, x0, args=(a, b), method='broyden1')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(sol)
t0 = time.time()
sol = scipy.optimize.root(f, x0, args=(a, b), method='anderson')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(sol)
t0 = time.time()
sol = scipy.optimize.root(f, x0, args=(a, b), method='linearmixing')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(sol)
t0 = time.time()
sol = scipy.optimize.root(f, x0, args=(a, b), method='krylov')
t1 = time.time()
print("\nProcess executed in : %s : seconds. \n" %(t1 - t0))
print(sol)
In [49]:
import matplotlib.pyplot as plt
%matplotlib inline
x = np.arange(0, np.pi, 0.01)
y = np.cos(x)
plt.plot(x, y)
plt.show()
In [55]:
x = np.arange(0, np.pi, 0.01)
y = np.cos(x)
plt.plot(x, y)
#Add axis labels
plt.xlabel('Lunch Seminars Per Week')
plt.ylabel('Marginal Productivity of Doctoral Students')
#Add title
plt.title("Marginal Productivity of Doctoral\n"
+ "Students as a Function\n"
+ " of Lunch Seminars Per Week")
#Add emphasis to important points
points = np.array([1.0, 2.5])
plt.plot(points, np.cos(points), 'ro')
#Add a label and legend to the points
plt.plot(points, np.cos(points), 'o', label='Nap Thresholds')
#plt.legend()
#But the legend is poorly placed, so move it to a better spot
plt.legend(loc=2)
plt.show()
In [56]:
x = np.arange(0, 10, 0.1)
f = lambda x: np.cos(x)
g = lambda x: np.sin(x)
h = lambda x: x**2
i = lambda x: np.log(x + 0.00001)
#Create the figure and axes objects. sharex and sharey allow them to share axes
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
#Plot on the first axes object. Notice, you can plot several times on the same object
ax1.plot(x, f(x))
ax1.plot(x, g(x))
#Plot on the second axes object
ax2.plot(x, f(x)*g(x))
#Plot on the second axes object
ax3.plot(x, h(x))
#Plot on the second axes object
ax4.plot(x, i(x))
plt.show()
In [ ]:
%matplotlib qt
In [ ]:
%matplotlib inline
In [57]:
from mpl_toolkits.mplot3d import Axes3D
def U(c1, c2, beta, gamma):
return c1**(1 - gamma)/(1 - gamma) + beta*c2**(1 - gamma)/(1 - gamma)
beta = 0.98
gamma = 2.0
fig = plt.figure()
ax = fig.gca(projection="3d")
low = 1.0
high = 10.0
c1, c2 = np.arange(low, high, 0.1), np.arange(low, high, 0.1)
#c2 = np.arange(low, high, 0.1)
C1, C2 = np.meshgrid(c1, c2)
utils = U(C1, C2, beta, gamma)
ax.plot_surface(C1, C2, utils, alpha=0.3)
cset = ax.contour(C1, C2, utils, zdir='z', offset=-2.0)
#cset = ax.contour(C1, C2, utils, zdir='y', offset=10.0)
#cset = ax.contour(C1, C2, utils, zdir='x', offset=1.0)
plt.show()
In [60]:
import pandas as pd
DF = pd.read_csv('http://people.stern.nyu.edu/wgreene/Econometrics/gasoline.csv')
In [61]:
print(DF.head())
DF.head()
Out[61]:
In [62]:
DF.shape
Out[62]:
In [65]:
DF.describe()
Out[65]:
In [66]:
DF.columns
Out[66]:
In [70]:
DF.Y.plot()
Out[70]:
In [67]:
DF['Y'].plot()
Out[67]:
In [71]:
DF.columns = map(str.lower, DF.columns)
print(DF.columns)
In [72]:
DF2 = DF.set_index('year')
DF2.head()
Out[72]:
In [73]:
print(DF2.loc[1953])
In [ ]: