In [224]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression

In [225]:
def rmse(act, pred):
    return np.sqrt(((act-pred) ** 2).mean())

In [226]:
train = pd.read_csv('./regression_train.csv')
test = pd.read_csv('./regression_test.csv')

In [227]:
train.head()


Out[227]:
y 1 2 3 4 5 6 7 8 9 10 11 12
0 90 6 2 1036 103 114 1.00 1.00 172076 355965 2.0 6527 1851864
1 88 1 0 2165 205 101 0.40 1.20 43107 44139 3.0 130 1131931
2 85 62 77 3806 258 166 1.40 1.40 492142 268706 5.2 256 1314590
3 81 5 0 4721 256 177 0.99 2.58 524787 174964 1.0 233 972606
4 79 42 55 3949 249 244 2.60 4.60 197289 529200 3.4 331 1013805

In [228]:
print(type(train.iloc[:, 1:]))
print(type(train.iloc[:, 0]))


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>

In [229]:
model = LogisticRegression(penalty='l1', C=0.08, random_state=1, max_iter=200).fit(train.iloc[:, 1:], train.iloc[:, 0])
result = model.predict(test.iloc[:, 1:])

In [230]:
print(rmse(test.iloc[:, 0].values, result))


5.45819911521

In [231]:
print(result)


[80 83 84 ..., 91 98 82]

In [232]:
# penalty='l1', C=0.5: 5.49093906269
# penalty='l1', C=1  : 5.49835456334
# C=0.1: 5.46550042007
# C=0.08: 5.4605413578

In [234]:
tmp = pd.DataFrame({'lr_testresult':result})
tmp.to_csv('lr_testresult.csv', index=False)
trainresult = model.predict(train.iloc[:, 1:])
tmp = pd.DataFrame({'lr_trainresult':trainresult})
tmp.to_csv('lr_trainresult.csv', index=False)

In [ ]: