In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pylab as plb
import seaborn
import pandas as pd
from sklearn.linear_model import LinearRegression

In [2]:
np.random.seed(1234)

In [3]:
from scipy.stats import norm

In [59]:
fig = plt.figure()
x = np.linspace(norm.ppf(0.001),
                norm.ppf(0.999), 100)
rv = norm()
yp = rv.pdf(x)
yc = rv.cdf(x)
ax = fig.add_subplot(211)
ax.plot(x, yp, 'k-', lw=2, label='frozen pdf')
ax.plot((1,1),(0,rv.pdf(1)))
plt.title("Probability Density Function (for a Normal random variable)")
plt.annotate("(1 , " + str("{0:.2f}".format(rv.pdf(1))) + ")", xy=(1, rv.pdf(1)), xytext=(1.2, rv.pdf(1)))
plt.fill_between(x, yp, where=[num <= 1 for num in x])
plt.xlim(-3,3)
ax = fig.add_subplot(212)
ax.plot(x, rv.cdf(x), 'k-', lw=2, label='frozen pdf')
ax.plot((1,1),(0,rv.cdf(1)))
plt.annotate("(1 , " + str("{0:.2f}".format(rv.cdf(1))) + ")", xy=(1, rv.cdf(1)), xytext=(1.2, rv.cdf(1) - 0.1))
plt.xlim(-3,3)
plt.title("Cumulative Distribution Function (for a Normal random variable)")
plb.savefig("pics/2017/02/18-probability-a-measure-theoretic-approach_pdf-cdf1.png")
plt.show()



In [48]:
rv.pdf(1)


Out[48]:
0.24197072451914337

In [3]:
lr = LinearRegression(fit_intercept=True)

In [4]:
dats_x_range = [x*0.2 for x in range(20)]*5 + np.random.uniform(high=0.1, low=-0.1, size=100)
dats = [dats_x_range, [x*5 for x in dats_x_range] + np.random.normal(loc=0.0, scale=2, size=100)]

In [5]:
plt.scatter(dats[0], dats[1])
plb.savefig('pics/2017/02/21-linear-regression_scatterPoints.png')
plt.show()



In [7]:
lr = LinearRegression(fit_intercept=True)
lr.fit([[x] for x in dats[0]], dats[1])


Out[7]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [8]:
plt.scatter(dats[0], dats[1])
plt.plot(dats[0], lr.intercept_ + lr.coef_*dats[0], c='r')
plb.savefig('pics/2017/02/21-linear-regression_scatterPoints_withLine.png')
plt.show()



In [ ]: