In [38]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
In [39]:
# Get Data (1: satisfaction, 0: unsatisfaction)
df = pd.read_csv('lunch_menu.csv')
df.tail()
Out[39]:
In [40]:
# select columns
feature_names = ['temperate', 'weather', 'menu']
target_name = ["result"]
df = df[feature_names + target_name].reset_index(drop=True)
# remove Nan data
df.dropna(axis=0, how="any", inplace=True)
# split feature & target
dfX = df[feature_names].reset_index(drop=True)
dfy = df[target_name]
In [41]:
# LabelEncoder
label_list = ['weather', 'menu']
for label in label_list:
dfX[label] = LabelEncoder().fit_transform(dfX[label])
In [42]:
# OneHotEncoding
onehot_list = ['weather', 'menu']
for onehot in onehot_list:
column_list = []
count = len(df[onehot].unique())
for idx in range(count):
column_list.append(onehot + "_" + str(idx))
dfX2 = pd.DataFrame(OneHotEncoder().fit_transform(dfX[onehot].as_matrix()[:,np.newaxis]).toarray(),
columns=column_list, index=dfX.index)
dfX = pd.concat([dfX, dfX2], axis=1)
del(dfX[onehot])
In [43]:
X_train, X_test, y_train, y_test = train_test_split(dfX, dfy, test_size=0.3, random_state=1)
model = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=3).fit(X_train, y_train)
In [44]:
print("accuracy score : {}".format(accuracy_score(y_test, model.predict(X_test))))
print(confusion_matrix(y_test, model.predict(X_test)))
print(classification_report(y_test, model.predict(X_test)))
In [45]:
def inputEncoding(data):
ck = columKeywords(label_list)
result = []
for idx, feature in enumerate(feature_names):
if feature in ck:
for value in ck[feature]:
if value == data[idx]:
result.append(1)
else:
result.append(0)
else:
result.append(data[idx])
return result
def getKeywords(keyword):
count = len(df[keyword].unique())
keyword_list = []
for idx in range(count):
column = keyword + "_" + str(idx)
value = df[dfX[column]==1][keyword].unique()[0]
keyword_list.append(value)
return keyword_list
def columKeywords(label_list):
keywords_list = {}
for label in label_list:
keywords_list[label] = getKeywords(label)
return keywords_list
In [46]:
result_keywords = {1:'만족', 0:'불만족'}
def pridict(data):
predict_result = np.array(data).reshape(1, -1)
print("불만족할 확률 : {}".format(model.predict_proba(predict_result)[0][0]))
print("만족할 확률 : {}".format(model.predict_proba(predict_result)[0][1]))
print("결론 : {}".format(result_keywords[model.predict(predict_result)[0]]))
return(model.predict_proba(predict_result)[0][0], model.predict_proba(predict_result)[0][1])
In [47]:
# weather : 맑음, 흐림, 비, 눈
# menu : 냉면, 칼국수, 햄버거, 한식
data = [10, '맑음', '한식']
pridict(inputEncoding(data))
Out[47]:
In [62]:
# choice menu
def choiceMenu(data, choice):
choice_list = df[choice].unique()
menu = ""
start_prob = -1
for choice_str in choice_list:
print(choice_str)
data[feature_names.index(choice)] = choice_str
prob = pridict(inputEncoding(data))
if start_prob < prob[1]:
menu = choice_str
start_prob = prob[1]
elif start_prob == prob[1]:
menu += ", "+choice_str
print()
if choice == 'menu':
print("{} 드세요.".format(menu))
if choice == 'weather':
if start_prob == 0:
print("먹지 마세요.")
else:
print("{}날 드세요.".format(menu))
In [63]:
data = [35, '흐림', '']
choiceMenu(data, 'menu')
In [73]:
data = [30, '', '햄버거']
choiceMenu(data, 'weather')
In [ ]: