In [1]:
#! usr/bin/env/python3
#-*- coding: utf-8 -*-
In [2]:
# from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import seaborn as sns
import sklearn as sk
import numpy as np
import matplotlib.pyplot as plt
sns.set()
# plt.rc('font',family='AppleGothic')
In [3]:
# Dataframe 전체 출력 함수
def print_full(x):
pd.set_option('display.max_rows', len(x))
print(x)
pd.reset_option('display.max_rows')
In [4]:
data = pd.DataFrame.from_csv("name_and_train_simple_preprocess.csv")
data.head()
# data.columns
Out[4]:
In [5]:
X = data[['Cat', 'Dog', 'Intact Female',
'Intact Male', 'Neutered Male','Spayed Female']]
print(X.head())
In [6]:
y = data['OutcomeType_label']
y.head()
Out[6]:
In [8]:
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=None,
min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None,
bootstrap=True, oob_score=False,
n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight="balanced")
model_rand_forest = RandomForestClassifier.fit(clf, X, y)
test = model_rand_forest.predict(X)
score = model_rand_forest.score(X, y)
print("score : ", score)
# show full array
# np.set_printoptions(threshold=np.nan)
print(test)
In [ ]:
# First EDA(Exploratory data analysis) with vote_1(target)
# sns.pairplot(prep_X_Y, diag_kind="kde", kind="reg", size=5)
# plt.show()
In [ ]:
# Second EDA(like excel)
In [ ]:
# Result
# AnimalID,Adoption,Died,Euthanasia,Return_to_owner,Transfer
# A715022,1,0,0,0,0
# A677429,0.5,0.3,0.2,0,0