In [13]:
import pandas as pd
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print(df.to_csv(index=False,header=False,line_terminator=','))
In [20]:
%pylab inline
import pylab as pl
import numpy as np
#from sklearn import datasets, linear_model
import pandas as pd
import statsmodels.api as sm
# import the cleaned up dataset
df = pd.read_csv('../datasets/loanf.csv')
intrate = df['Interest.Rate']
loanamt = df['Loan.Amount']
fico = df['FICO.Score']
# reshape the data from a pandas Series to columns
# the dependent variable
y = np.matrix(intrate).transpose()
# the independent variables shaped as columns
x1 = np.matrix(fico).transpose()
x2 = np.matrix(loanamt).transpose()
# put the two columns together to create an input matrix
# if we had n independent variables we would have n columns here
x = np.column_stack([x1,x2])
# create a linear model and fit it to the data
X = sm.add_constant(x)
model = sm.OLS(y,X)
f = model.fit()
print('Coefficients: ', f.params[0:2])
print('Intercept: ', f.params[2])
print('P-Values: ', f.pvalues)
print('R-Squared: ', f.rsquared)
In [ ]: