In [1]:
#! usr/bin/env/python3
#-*- coding: utf-8 -*-

In [2]:
# from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import seaborn as sns
import sklearn as sk
import numpy as np
import matplotlib.pyplot as plt

sns.set()

# plt.rc('font',family='AppleGothic')

In [3]:
# Dataframe 전체 출력 함수
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [4]:
data = pd.DataFrame.from_csv("name_and_train_simple_preprocess.csv")
data.head()
# data.columns


Out[4]:
Name Cat Dog Intact Female Intact Male Neutered Male Spayed Female OutcomeType_label
0 1 0 1 0 0 1 0 3
1 1 1 0 0 0 0 1 2
2 1 0 1 0 0 1 0 0
3 0 1 0 0 1 0 0 4
4 0 0 1 0 0 1 0 4

In [5]:
X = data[['Cat', 'Dog', 'Intact Female',
         'Intact Male', 'Neutered Male','Spayed Female']]

print(X.head())


   Cat  Dog  Intact Female  Intact Male  Neutered Male  Spayed Female
0    0    1              0            0              1              0
1    1    0              0            0              0              1
2    0    1              0            0              1              0
3    1    0              0            1              0              0
4    0    1              0            0              1              0

In [6]:
y = data['OutcomeType_label']
y.head()


Out[6]:
0    3
1    2
2    0
3    4
4    4
Name: OutcomeType_label, dtype: int64
model_logistic = LogisticRegression(C=1000, solver='lbfgs',multi_class='multinomial').fit(X, y) y_pred = model_logistic.predict(X) # show full array np.set_printoptions(threshold=np.nan) print(y_pred)

In [8]:
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=None,
                             min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None,
                             bootstrap=True, oob_score=False,
                             n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight="balanced")

model_rand_forest = RandomForestClassifier.fit(clf, X, y)
test = model_rand_forest.predict(X)

score = model_rand_forest.score(X, y)
print("score : ", score)

# show full array
# np.set_printoptions(threshold=np.nan)
print(test)


score :  0.304313666804
[3 0 3 ..., 3 1 1]

In [ ]:
# First EDA(Exploratory data analysis) with vote_1(target)

# sns.pairplot(prep_X_Y, diag_kind="kde", kind="reg", size=5)
# plt.show()

In [ ]:
# Second EDA(like excel)

In [ ]:
# Result

# AnimalID,Adoption,Died,Euthanasia,Return_to_owner,Transfer
# A715022,1,0,0,0,0
# A677429,0.5,0.3,0.2,0,0