notebook.community

Edit and run



In [38]:

    
import numpy as np
import pandas as pd
import random 

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder



In [39]:

    
# Get Data (1: satisfaction, 0: unsatisfaction)
df = pd.read_csv('lunch_menu.csv')
df.tail()



In [40]:

    
# select columns
feature_names = ['temperate', 'weather', 'menu']
target_name = ["result"]
df = df[feature_names + target_name].reset_index(drop=True)

# remove Nan data
df.dropna(axis=0, how="any", inplace=True)

# split feature & target
dfX = df[feature_names].reset_index(drop=True)
dfy = df[target_name]



In [41]:

    
# LabelEncoder
label_list = ['weather', 'menu']
for label in label_list:
    dfX[label] = LabelEncoder().fit_transform(dfX[label])



In [42]:

    
# OneHotEncoding
onehot_list = ['weather', 'menu']
for onehot in onehot_list:
    column_list = []
    count = len(df[onehot].unique())
    for idx in range(count):
        column_list.append(onehot + "_" + str(idx))
    dfX2 = pd.DataFrame(OneHotEncoder().fit_transform(dfX[onehot].as_matrix()[:,np.newaxis]).toarray(), 
            columns=column_list, index=dfX.index)
    dfX = pd.concat([dfX, dfX2], axis=1)
    del(dfX[onehot])



In [43]:

    
X_train, X_test, y_train, y_test = train_test_split(dfX, dfy, test_size=0.3, random_state=1)
model = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=3).fit(X_train, y_train)



In [44]:

    
print("accuracy score : {}".format(accuracy_score(y_test, model.predict(X_test))))
print(confusion_matrix(y_test, model.predict(X_test)))
print(classification_report(y_test, model.predict(X_test)))









    



accuracy score : 0.7301587301587301
[[21 14]
 [ 3 25]]
             precision    recall  f1-score   support

          0       0.88      0.60      0.71        35
          1       0.64      0.89      0.75        28

avg / total       0.77      0.73      0.73        63



In [45]:

    
def inputEncoding(data):
    ck = columKeywords(label_list)
    result = []
    for idx, feature in enumerate(feature_names):
        if feature in ck:
            for value in ck[feature]:
                if value == data[idx]:
                    result.append(1)
                else:
                    result.append(0)
        else:
            result.append(data[idx])
    return result

def getKeywords(keyword):
    count = len(df[keyword].unique())
    keyword_list = []
    for idx in range(count):
        column = keyword + "_" + str(idx)
        value = df[dfX[column]==1][keyword].unique()[0]
        keyword_list.append(value)
    return keyword_list

def columKeywords(label_list):
    keywords_list = {}
    for label in label_list:
        keywords_list[label] = getKeywords(label)
    return keywords_list



In [46]:

    
result_keywords = {1:'만족', 0:'불만족'}
def pridict(data):
    predict_result = np.array(data).reshape(1, -1)
    print("불만족할 확률 : {}".format(model.predict_proba(predict_result)[0][0]))
    print("만족할 확률 : {}".format(model.predict_proba(predict_result)[0][1]))
    print("결론 : {}".format(result_keywords[model.predict(predict_result)[0]]))
    return(model.predict_proba(predict_result)[0][0], model.predict_proba(predict_result)[0][1])



In [47]:

    
# weather : 맑음, 흐림, 비, 눈
# menu : 냉면, 칼국수, 햄버거, 한식
data = [10, '맑음', '한식']
pridict(inputEncoding(data))









    



불만족할 확률 : 0.23809523809523808
만족할 확률 : 0.7619047619047619
결론 : 만족






    Out[47]:





(0.23809523809523808, 0.76190476190476186)



In [62]:

    
# choice menu
def choiceMenu(data, choice):
    choice_list = df[choice].unique()
    menu = ""
    start_prob = -1
    for choice_str in choice_list:
        print(choice_str)
        data[feature_names.index(choice)] = choice_str
        prob = pridict(inputEncoding(data))
        if start_prob < prob[1]:
            menu = choice_str
            start_prob = prob[1]
        elif start_prob == prob[1]:
            menu += ", "+choice_str
        print()
    if choice == 'menu':
        print("{} 드세요.".format(menu))
    if choice == 'weather':
        if start_prob == 0:
            print("먹지 마세요.")
        else:
            print("{}날 드세요.".format(menu))



In [63]:

    
data = [35, '흐림', '']
choiceMenu(data, 'menu')









    



냉면
불만족할 확률 : 0.0
만족할 확률 : 1.0
결론 : 만족

칼국수
불만족할 확률 : 1.0
만족할 확률 : 0.0
결론 : 불만족

햄버거
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

한식
불만족할 확률 : 0.8333333333333334
만족할 확률 : 0.16666666666666666
결론 : 불만족

냉면 드세요.



In [73]:

    
data = [30, '', '햄버거']
choiceMenu(data, 'weather')









    



맑음
불만족할 확률 : 0.25
만족할 확률 : 0.75
결론 : 만족

흐림
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

비
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

눈
불만족할 확률 : 0.3333333333333333
만족할 확률 : 0.6666666666666666
결론 : 만족

맑음날 드세요.



In [ ]:

	weather	week	temperate	menu	result
203	눈	금	2	한식	0
204	눈	월	1	냉면	0
205	눈	화	2	칼국수	1
206	눈	수	3	햄버거	1
207	눈	목	1	한식	1