In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
In [2]:
# データ読み込み
data = pd.read_csv('data/tab26.csv')
data.head()
Out[2]:
In [3]:
# 説明変数設定
X = data[['x1', 'x2', 'x3', 'x4']]
X = sm.add_constant(X)
# 非説明変数設定
Y = data['y']
In [4]:
# OLSの実行
model1 = sm.OLS(Y,X)
results1 = model1.fit()
results1.summary()
Out[4]:
In [5]:
# 説明変数設定
X = data[['x1', 'x2', 'x3']]
X = sm.add_constant(X)
# OLSの実行
model2 = sm.OLS(Y,X)
results2 = model2.fit()
results2.summary()
Out[5]:
In [6]:
# 説明変数設定
X = data[['x1', 'x2']]
X = sm.add_constant(X)
# OLSの実行
model3 = sm.OLS(Y,X)
results3 = model3.fit()
results3.summary()
Out[6]:
In [7]:
# 説明変数設定
X = data[['x1']]
X = sm.add_constant(X)
# OLSの実行
model4 = sm.OLS(Y,X)
results4 = model4.fit()
results4.summary()
Out[7]:
In [8]:
# モデル選択
criteria = pd.DataFrame(index=['results1', 'results2', 'results3', 'results4'])
criteria["AIC"] = [results1.aic, results2.aic, results3.aic, results4.aic]
criteria["BIC"] = [results1.bic, results2.bic, results3.bic, results4.bic]
criteria
Out[8]: