In [224]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
In [225]:
def rmse(act, pred):
return np.sqrt(((act-pred) ** 2).mean())
In [226]:
train = pd.read_csv('./regression_train.csv')
test = pd.read_csv('./regression_test.csv')
In [227]:
train.head()
Out[227]:
In [228]:
print(type(train.iloc[:, 1:]))
print(type(train.iloc[:, 0]))
In [229]:
model = LogisticRegression(penalty='l1', C=0.08, random_state=1, max_iter=200).fit(train.iloc[:, 1:], train.iloc[:, 0])
result = model.predict(test.iloc[:, 1:])
In [230]:
print(rmse(test.iloc[:, 0].values, result))
In [231]:
print(result)
In [232]:
# penalty='l1', C=0.5: 5.49093906269
# penalty='l1', C=1 : 5.49835456334
# C=0.1: 5.46550042007
# C=0.08: 5.4605413578
In [234]:
tmp = pd.DataFrame({'lr_testresult':result})
tmp.to_csv('lr_testresult.csv', index=False)
trainresult = model.predict(train.iloc[:, 1:])
tmp = pd.DataFrame({'lr_trainresult':trainresult})
tmp.to_csv('lr_trainresult.csv', index=False)
In [ ]: