In [1]:

    
#! usr/bin/env/python3
#-*- coding: utf-8 -*-



In [2]:

    
# from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import seaborn as sns
import sklearn as sk
import numpy as np
import matplotlib.pyplot as plt

sns.set()

# plt.rc('font',family='AppleGothic')



In [3]:

    
# Dataframe 전체 출력 함수
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')



In [4]:

    
data = pd.DataFrame.from_csv("name_and_train_simple_preprocess.csv")
data.head()
# data.columns









    Out[4]:






  
    
      
      Name
      Cat
      Dog
      Intact Female
      Intact Male
      Neutered Male
      Spayed Female
      OutcomeType_label
    
  
  
    
      0
      1
      0
      1
      0
      0
      1
      0
      3
    
    
      1
      1
      1
      0
      0
      0
      0
      1
      2
    
    
      2
      1
      0
      1
      0
      0
      1
      0
      0
    
    
      3
      0
      1
      0
      0
      1
      0
      0
      4
    
    
      4
      0
      0
      1
      0
      0
      1
      0
      4



In [5]:

    
X = data[['Cat', 'Dog', 'Intact Female',
         'Intact Male', 'Neutered Male','Spayed Female']]

print(X.head())









    



   Cat  Dog  Intact Female  Intact Male  Neutered Male  Spayed Female
0    0    1              0            0              1              0
1    1    0              0            0              0              1
2    0    1              0            0              1              0
3    1    0              0            1              0              0
4    0    1              0            0              1              0



In [6]:

    
y = data['OutcomeType_label']
y.head()









    Out[6]:





0    3
1    2
2    0
3    4
4    4
Name: OutcomeType_label, dtype: int64

model_logistic = LogisticRegression(C=1000, solver='lbfgs',multi_class='multinomial').fit(X, y) y_pred = model_logistic.predict(X) # show full array np.set_printoptions(threshold=np.nan) print(y_pred)



In [8]:

    
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=None,
                             min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None,
                             bootstrap=True, oob_score=False,
                             n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight="balanced")

model_rand_forest = RandomForestClassifier.fit(clf, X, y)
test = model_rand_forest.predict(X)

score = model_rand_forest.score(X, y)
print("score : ", score)

# show full array
# np.set_printoptions(threshold=np.nan)
print(test)









    



score :  0.304313666804
[3 0 3 ..., 3 1 1]



In [ ]:

    
# First EDA(Exploratory data analysis) with vote_1(target)

# sns.pairplot(prep_X_Y, diag_kind="kde", kind="reg", size=5)
# plt.show()



In [ ]:

    
# Second EDA(like excel)



In [ ]:

    
# Result

# AnimalID,Adoption,Died,Euthanasia,Return_to_owner,Transfer
# A715022,1,0,0,0,0
# A677429,0.5,0.3,0.2,0,0