In [38]:
import numpy as np
import pandas as pd
import random 

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

In [39]:
# Get Data (1: satisfaction, 0: unsatisfaction)
df = pd.read_csv('lunch_menu.csv')
df.tail()


Out[39]:
weather week temperate menu result
203 2 한식 0
204 1 냉면 0
205 2 칼국수 1
206 3 햄버거 1
207 1 한식 1

In [40]:
# select columns
feature_names = ['temperate', 'weather', 'menu']
target_name = ["result"]
df = df[feature_names + target_name].reset_index(drop=True)

# remove Nan data
df.dropna(axis=0, how="any", inplace=True)

# split feature & target
dfX = df[feature_names].reset_index(drop=True)
dfy = df[target_name]

In [41]:
# LabelEncoder
label_list = ['weather', 'menu']
for label in label_list:
    dfX[label] = LabelEncoder().fit_transform(dfX[label])

In [42]:
# OneHotEncoding
onehot_list = ['weather', 'menu']
for onehot in onehot_list:
    column_list = []
    count = len(df[onehot].unique())
    for idx in range(count):
        column_list.append(onehot + "_" + str(idx))
    dfX2 = pd.DataFrame(OneHotEncoder().fit_transform(dfX[onehot].as_matrix()[:,np.newaxis]).toarray(), 
            columns=column_list, index=dfX.index)
    dfX = pd.concat([dfX, dfX2], axis=1)
    del(dfX[onehot])

In [43]:
X_train, X_test, y_train, y_test = train_test_split(dfX, dfy, test_size=0.3, random_state=1)
model = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=3).fit(X_train, y_train)

In [44]:
print("accuracy score : {}".format(accuracy_score(y_test, model.predict(X_test))))
print(confusion_matrix(y_test, model.predict(X_test)))
print(classification_report(y_test, model.predict(X_test)))


accuracy score : 0.7301587301587301
[[21 14]
 [ 3 25]]
             precision    recall  f1-score   support

          0       0.88      0.60      0.71        35
          1       0.64      0.89      0.75        28

avg / total       0.77      0.73      0.73        63


In [45]:
def inputEncoding(data):
    ck = columKeywords(label_list)
    result = []
    for idx, feature in enumerate(feature_names):
        if feature in ck:
            for value in ck[feature]:
                if value == data[idx]:
                    result.append(1)
                else:
                    result.append(0)
        else:
            result.append(data[idx])
    return result

def getKeywords(keyword):
    count = len(df[keyword].unique())
    keyword_list = []
    for idx in range(count):
        column = keyword + "_" + str(idx)
        value = df[dfX[column]==1][keyword].unique()[0]
        keyword_list.append(value)
    return keyword_list

def columKeywords(label_list):
    keywords_list = {}
    for label in label_list:
        keywords_list[label] = getKeywords(label)
    return keywords_list

In [46]:
result_keywords = {1:'만족', 0:'불만족'}
def pridict(data):
    predict_result = np.array(data).reshape(1, -1)
    print("불만족할 확률 : {}".format(model.predict_proba(predict_result)[0][0]))
    print("만족할 확률 : {}".format(model.predict_proba(predict_result)[0][1]))
    print("결론 : {}".format(result_keywords[model.predict(predict_result)[0]]))
    return(model.predict_proba(predict_result)[0][0], model.predict_proba(predict_result)[0][1])

In [47]:
# weather : 맑음, 흐림, 비, 눈
# menu : 냉면, 칼국수, 햄버거, 한식
data = [10, '맑음', '한식']
pridict(inputEncoding(data))


불만족할 확률 : 0.23809523809523808
만족할 확률 : 0.7619047619047619
결론 : 만족
Out[47]:
(0.23809523809523808, 0.76190476190476186)

In [62]:
# choice menu
def choiceMenu(data, choice):
    choice_list = df[choice].unique()
    menu = ""
    start_prob = -1
    for choice_str in choice_list:
        print(choice_str)
        data[feature_names.index(choice)] = choice_str
        prob = pridict(inputEncoding(data))
        if start_prob < prob[1]:
            menu = choice_str
            start_prob = prob[1]
        elif start_prob == prob[1]:
            menu += ", "+choice_str
        print()
    if choice == 'menu':
        print("{} 드세요.".format(menu))
    if choice == 'weather':
        if start_prob == 0:
            print("먹지 마세요.")
        else:
            print("{}날 드세요.".format(menu))

In [63]:
data = [35, '흐림', '']
choiceMenu(data, 'menu')


냉면
불만족할 확률 : 0.0
만족할 확률 : 1.0
결론 : 만족

칼국수
불만족할 확률 : 1.0
만족할 확률 : 0.0
결론 : 불만족

햄버거
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

한식
불만족할 확률 : 0.8333333333333334
만족할 확률 : 0.16666666666666666
결론 : 불만족

냉면 드세요.

In [73]:
data = [30, '', '햄버거']
choiceMenu(data, 'weather')


맑음
불만족할 확률 : 0.25
만족할 확률 : 0.75
결론 : 만족

흐림
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

비
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

눈
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

맑음날 드세요.

In [ ]: