In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
dir(list)
Out[2]:
In [3]:
class Rectangle(object):
"""
retangular objects - requires a 2 x 5 np.array corresponding to points in the plane
traversed counterclockwise - first same as last
"""
def __init__(self, coords=None):
"""
C++/Java/Fortran/etc. programmers - this is like the constructor
"""
self.coords = coords
def plot(self, **kwargs):
"""
class method - generally public in Python
"""
plt.fill_between(self.coords[0],self.coords[1], **kwargs)
In [4]:
unit_square = Rectangle(coords=np.array([[0, 1, 1, 0], [0, 0, 1, 1]]))
In [5]:
dir(unit_square)
Out[5]:
In [6]:
unit_square.plot(lw=5)
In [7]:
fig, ax = plt.subplots()
unit_square.plot(lw=5)
plt.ylim(-1, 2)
plt.xlim(-1, 2);
In [8]:
class Rectangle(object):
"""
rectangular objects
requires a 2 x 5 np.array corresponding to four points in the plane
traversed counterclockwise
two sides parallel to horizontal axis
first set of coordinates same as last
"""
def __init__(self, coords=None):
self.coords = coords
def plot(self, **kwargs):
"""
basic mechanism to plot the rectangle
"""
plt.fill_between(self.coords[0],self.coords[1], **kwargs)
def get_area(self):
"""
basic area function
Document class methods just like any other function
"""
return (np.max(self.coords[0]) - np.min(self.coords[0])) * (np.max(self.coords[1] - np.min(self.coords[1])))
In [9]:
rect = Rectangle(coords=np.array([[0, 2, 2, 0, 0], [0, 0, 1, 1, 0]]))
print('rectangle area = %d' % rect.get_area())
rect.plot(lw=5)
In [10]:
dir(rect)
Out[10]:
In [11]:
rect.__dir__()
Out[11]:
In [12]:
dir(rect).sort() == rect.__dir__().sort()
Out[12]:
Check out our documentation:
In [13]:
rect.plot?
Classes are dicts in a sense - hence the magic dict function.
In [14]:
rect.__dict__
Out[14]:
In [15]:
# objects are the most basic Python types
class EuclideanShape2D(object):
"""
generic base class for shapes
all shapes have area"""
def __init__(self):
pass
def get_area(self):
pass
def plot(self):
print('no plot method defined - TO DO')
pass
In [16]:
object?
In [17]:
class Rectangle(EuclideanShape2D):
"""
rectangular objects
requires a 2 x 5 np.array corresponding to four points in the plane
traversed counterclockwise
two sides parallel to horizontal axis
"""
def __init__(self, coords=None):
self.coords = coords
def plot(self, **kwargs):
"""
basic mechanism to plot the rectangle
"""
plt.fill_between(self.coords[0],self.coords[1], **kwargs)
def get_area(self):
"""
basic area function
"""
return (np.max(self.coords[0]) - np.min(self.coords[0])) * (np.max(self.coords[1] - np.min(self.coords[1])))
In [18]:
class Circle(EuclideanShape2D):
"""
circular objects
requires a center and a radius
"""
def __init__(self, center=None, radius=None):
self.center = center
self.radius = radius
def plot(self, **kwargs):
c = plt.Circle(self.center, self.radius)
fig, ax = plt.subplots(figsize=(6,6))
ax.set_ylim(self.center[1] - self.radius - 1, self.center[1] + self.radius + 1)
ax.set_xlim(self.center[0] - self.radius - 1, self.center[1] + self.radius + 1)
ax.add_artist(c)
def get_area(self):
return np.pi*self.radius**2
def get_circumference(self):
return 2*np.pi*self.radius
In [19]:
circy = Circle(center=(1,1), radius=3)
circy.plot(lw=5)
In [20]:
circy.get_circumference()
Out[20]:
In [21]:
class Triangle(EuclideanShape2D):
"""
triangle class
requires a 2 x 3 np array corresponding to three points in the plane
assumes base is parallel to x-axis
starting point is lower left"""
def __init__(self, coords):
self.coords = coords
def get_area(self):
return 0.5 * (self.coords[0, 1] - self.coords[0, 0]) * (self.coords[1,2] - self.coords[1,1])
# no plotting method defined - what happens?
In [22]:
tri = Triangle(coords=np.array([[0, 2, 1], [0, 0, 3]]))
tri.get_area()
Out[22]:
In [23]:
tri.plot()
In [24]:
from scipy.stats import bernoulli
In [25]:
?bernoulli
In [26]:
dir(bernoulli)
Out[26]:
lots of stuff - what do the underscores and double-underscores all mean? https://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
In [27]:
bernoulli.__doc__
Out[27]:
Here is how we can use some of these magic methods to build a custom data structure - a list with additional capabilities, like 'head' and 'tail.'
In [28]:
class SpecialList:
'''
A class slightly extending the capabilities of list.
'''
def __init__(self, values=None):
if values is None:
self.values = []
else:
self.values = values
def __len__(self):
return len(self.values)
def __getitem__(self, key):
return self.values[key]
def __setitem__(self, key, value):
self.values[key] = value
def __delitem__(self, key):
del self.values[key]
def __iter__(self):
return iter(self.values)
def __reversed__(self):
return SpecialList(reversed(self.values))
def append(self, value):
self.values.append(value)
def head(self, n=5):
return self.values[:n]
def tail(self, n=5):
return self.values[-n:]
In [29]:
x = SpecialList(values=list(range(100)))
x.head()
Out[29]:
In [30]:
x.head(10)
Out[30]:
In [31]:
len(x)
Out[31]:
In [32]:
y=iter(x)
In [33]:
next(y)
Out[33]:
In [34]:
next(y)
Out[34]:
In [35]:
del x[5] # behavior defined by __delitem__!
In [36]:
x.head(10)
Out[36]:
In [37]:
next(y)
Out[37]:
In [38]:
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def ll(act, pred, threshold=1e-15):
pred = np.maximum(pred, threshold)
pred = np.minimum(1 - threshold, pred)
return (-1 / len(act)) * np.sum(((1 - act) * np.log(1 - pred) + act * np.log(pred)))
In [39]:
class SGDClassifier(object):
"""
sgd_classifier
random initialization
"""
def __init__(self, data=None, label=None, alpha=None, max_epochs=None):
# don't introduce a new class attribute outside of init!
self.data=data
self.label = label
if data is not None:
self.w = np.random.randn(self.data.shape[1]) / np.sqrt(self.data.shape[1]) # Xavier initialization
else:
self.w = None
if alpha is None:
self.alpha = 0.0001
else:
self.alpha = alpha
if max_epochs is None:
self.max_epochs = 10000
else:
self.max_epochs = max_epochs
self.train_losses=[]
self.val_losses=[]
def fit(self):
if self.data is None:
print('No data, nothing to fit')
return
if self.label is None:
print('No labels, can\'t fit')
return
# cross-validation
train_idx = np.random.choice(range(self.data.shape[0]), replace=False,
size=int(np.floor(0.8 * self.data.shape[0])))
val_idx = [i for i in range(self.data.shape[0]) if i not in train_idx]
trainX = self.data[train_idx, :]
valX = self.data[val_idx, :]
trainy = self.label[train_idx]
valy = self.label[val_idx]
n = trainX.shape[0]
for i in range(self.max_epochs):
# Update weights - where the magic happens
for j in range(n):
self.w += self.alpha * (trainy[j] -
sigmoid(np.dot(trainX[j, :], self.w))) * trainX[j, :]
if i % 100 == 0:
current_train_loss = ll(trainy, sigmoid(np.dot(trainX, self.w)))
self.train_losses.append(current_train_loss)
current_val_loss = ll(valy, sigmoid(np.dot(valX, self.w)))
self.val_losses.append(current_val_loss)
print('epoch {}: train loss {:.5f}\tvalidation loss {:.5f}'.format(i, current_train_loss, current_val_loss))
def predict(self):
# TO_DO
pass
def plot(self, **kwargs):
if self.data.shape[1] != 3:
print('wrong dimensions for plot != 2')
return
x = np.linspace(-5, 5, 100)
plt.plot(x, -self.w[0] / self.w[2] - (self.w[1] /self.w[2]) * x, **kwargs)
plt.scatter(self.data[:500, 1], self.data[:500, 2], color='red')
plt.scatter(self.data[500:, 1], self.data[500:, 2], color='blue')
plt.ylim(-5, 5)
In [40]:
# grab our synthetic data from last time
data = np.genfromtxt('../data/synthetic_data.txt', delimiter=' ')
label = np.genfromtxt('../data/label.txt')
In [41]:
d =SGDClassifier(data=data, label=label, alpha=0.0001, max_epochs=500)
In [42]:
whos
In [43]:
d.w
Out[43]:
In [44]:
d.fit()
In [45]:
d.plot(color='steelblue', lw=5)
In [46]:
d.w
Out[46]:
In [47]:
d.val_losses[-1]
Out[47]:
In [48]:
SGDClassifier(data=data, label=label).w
Out[48]:
In [49]:
# messier data
x1 = np.random.normal(loc=1, scale=3, size=500)
x2 = np.random.normal(loc=3, scale=3, size=500)
y1 = np.random.normal(loc=1, scale=3, size=500)
y2 = np.random.normal(loc=3, scale=3, size=500)
x = np.hstack([x1, x2])
y = np.hstack([y1, y2])
ones = np.ones(1000)
data = np.vstack([ones, x, y])
data = data.T
lab1 = np.zeros(500)
lab2 = np.ones(500)
labs = np.hstack([lab1, lab2]).T
In [50]:
d2 = SGDClassifier(data, labs, max_epochs=500)
d2.fit()
In [51]:
d2.plot(lw=5)
In [52]:
d2.val_losses[-1]
Out[52]:
In [ ]: