In [ ]:
In [4]:
adult.dtypes
Out[4]:
In [5]:
adult.income.values
Out[5]:
In [6]:
#Only numeric data for regression
X=adult[["age ","fnlwgt","education-num","capital-gain","capital-loss","hours-per-week"]].values
X
Out[6]:
In [7]:
from sklearn import datasets ## imports datasets from scikit-learn
data = datasets.load_boston() ## loads Boston dataset from datasets library
In [9]:
print (data.DESCR)
In [10]:
import numpy as np
import pandas as pd
In [13]:
# define the data/predictors as the pre-set feature names
df = pd.DataFrame(data.data, columns=data.feature_names)
# Put the target (housing value -- MEDV) in another DataFrame
target = pd.DataFrame(data.target, columns=["MEDV"])
In [14]:
## Without a constant
import statsmodels.api as sm
X = df["RM"]
y = target["MEDV"]
In [15]:
# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model
In [16]:
# Print out the statistics
model.summary()
Out[16]:
In [17]:
mtcars=pd.read_csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/datasets/mtcars.csv")
In [18]:
mtcars
Out[18]:
In [21]:
X = mtcars[["disp","wt","qsec"]]
y = mtcars["mpg"]
In [22]:
# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model
In [23]:
# Print out the statistics
model.summary()
Out[23]:
In [25]:
X
Out[25]:
In [26]:
y
Out[26]:
In [31]:
#http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
In [27]:
# Split the data into training/testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
#data_X_train = data_X[:-20]
#data_X_test = data_X[-20:]
In [28]:
X_train
Out[28]:
In [29]:
X_test
Out[29]:
In [ ]:
In [32]:
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(X_train, y_train)
Out[32]:
In [35]:
# Make predictions using the testing set
y_pred = regr.predict(X_test)
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(y_test, y_pred))
In [ ]: