In [13]:
import pandas as pd
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print(df.to_csv(index=False,header=False,line_terminator=','))


1,2,3,4,5,

In [20]:
%pylab inline
import pylab as pl
import numpy as np
#from sklearn import datasets, linear_model
import pandas as pd
import statsmodels.api as sm

# import the cleaned up dataset
df = pd.read_csv('../datasets/loanf.csv')

intrate = df['Interest.Rate']
loanamt = df['Loan.Amount']
fico = df['FICO.Score']


# reshape the data from a pandas Series to columns 
# the dependent variable
y = np.matrix(intrate).transpose()
# the independent variables shaped as columns
x1 = np.matrix(fico).transpose()
x2 = np.matrix(loanamt).transpose()

# put the two columns together to create an input matrix 
# if we had n independent variables we would have n columns here
x = np.column_stack([x1,x2])

# create a linear model and fit it to the data
X = sm.add_constant(x)
model = sm.OLS(y,X)
f = model.fit()

print('Coefficients: ', f.params[0:2])
print('Intercept: ', f.params[2])
print('P-Values: ', f.pvalues)
print('R-Squared: ', f.rsquared)


Populating the interactive namespace from numpy and matplotlib
Coefficients:  [ 72.88279832  -0.08844242]
Intercept:  0.000210747768548
P-Values:  [  0.00000000e+000   0.00000000e+000   5.96972978e-203]
R-Squared:  0.656632624649

In [ ]: