notebook.community

Edit and run



In [33]:

    
# shows how linear classification analysis can be applied to 1-dimensional data

import numpy as np
import matplotlib.pyplot as plt



In [34]:

    
# load the data
X = [0,1,2,3,4]
Y = [1,3,7,13,21]
    
print(X)
print(Y)









    



[0, 1, 2, 3, 4]
[1, 3, 7, 13, 21]



In [35]:

    
# let's turn X and Y into numpy arrays since that will be useful later
X = np.array(X)
Y = np.array(Y)



In [36]:

    
# let's plot the data to see what it looks like
plt.scatter(X,Y)
plt.show()



In [42]:

    
# apply the equations we learned to calculate a and b

denominator = X.dot(X) - X.mean() * X.sum()
a = ( X.dot(Y) - Y.mean()*X.sum() ) / denominator
b = ( Y.mean() * X.dot(X) - X.mean() * X.dot(Y) ) / denominator



In [38]:

    
# let's calculate the predicted Y
Yhat = a*X + b

# Slope is a
# Intercept is b



In [39]:

    
# let's plot everything together to make sure it worked
plt.scatter(X, Y)
plt.plot(X, Yhat)
plt.show()



In [44]:

    
# determine how good the model is by computing the r-squared
d1 = Y - Yhat
d2 = Y - Y.mean()
r2 = 1 - d1.dot(d1) / d2.dot(d2)
print("the error is:", 1-r2)

# Ask about 
rms=np.sqrt(sum((d1)**2)/5)
print rms









    



('the error is:', 0.053030303030302983)
1.67332005307