In [1]:
%matplotlib inline
import numpy as np
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import seaborn as sns
In [2]:
b0 = 0.5
b1 = 0.1
# from 1 to 100 in 1000 increments
x = np.linspace(1, 100, 1000)
# select a random subset of x values
x = np.random.choice(x, 100, replace=True)
# generate noise from a normal distribution
noise1 = stats.norm.rvs(loc=0.0, scale=10.0, size=len(x))
noise2 = stats.norm.rvs(loc=0.0, scale=1.0, size=len(x))
y1 = b0 + b1*x + noise1
y2 = b0 + b1*x + noise2
df = pd.DataFrame({'x': x, 'y1': y1, 'y2': y2})
In [3]:
sns.jointplot('x', 'y1', data=df, kind='reg', xlim=(-5, 105), color="r", size=7)
Out[3]:
In [4]:
model = smf.ols(formula='y1 ~ x', data=df).fit()
model.summary()
Out[4]:
In [5]:
sns.jointplot('x', 'y2', data=df, kind='reg', xlim=(-5, 105), size=7)
Out[5]:
In [6]:
model = smf.ols(formula='y2 ~ x', data=df).fit()
model.summary()
Out[6]:
In [ ]: