In [1]:
import pandas as pd
import statsmodels.api as sm
# Normal response variable
stackloss_conversion = sm.datasets.get_rdataset("stackloss", "datasets")
#print (stackloss_conversion.__doc__)
# Lognormal response variable
engel_food = sm.datasets.engel.load_pandas()
#print (engel_food.data)
# Binary response variable
titanic_survival = sm.datasets.get_rdataset("Titanic", "datasets")
#print (titanic_survival.__doc__)
# Continuous 0-1 response variable
duncan_prestige = sm.datasets.get_rdataset("Duncan", "car")
#print (duncan_prestige.__doc__)
# Categorical response variable
iris_flowers = sm.datasets.get_rdataset("iris")
#print (iris_flowers.__doc__)
In [4]:
# Showing plots inline, rather than in a new window
%matplotlib inline
# Modules
from pymc3 import *
import numpy as np
from ggplot import *
# Generating data
size = 200
true_intercept = 1
true_slope = 2
x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + np.random.normal(scale=.5, size=size)
# Plotting data
sim_data = pd.DataFrame({"x" : x, "y" : y})
sim_plot = ggplot(sim_data, aes(x="x", y="y")) + geom_point() +\
geom_abline(intercept=true_intercept, slope=true_slope)
print(sim_plot)