In [1]:
import pandas
import numpy as np
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

zoo = r'C:\Users\priyu\Machine-Learning\zoo-animal-classification\zoo.csv'
#zoo_class = r'C:\Users\priyu\Machine-Learning\zoo-animal-classification\class.csv'
training_set = pandas.read_csv(zoo,index_col = False)
#zoo_class_set = pandas.read_csv(zoo_class, index_col = False)
zoo_data_df = training_set[['hair','feathers','eggs','milk','airborne','aquatic','predator','toothed','backbone','breathes','venomous','fins','legs','tail','domestic','catsize']]
zoo_target_df = training_set[['class_type']]
zoo_target = zoo_target_df.values
zoo_data = zoo_data_df.values


C:\Users\priyu\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [9]:
validation_size = 0.20
seed = 7
X_train, X_validation, Y_train, Y_validation = cross_validation.train_test_split(zoo_data, zoo_target, test_size=validation_size, random_state=seed)

In [10]:
y = Y_train.ravel()
Y_train = np.array(y).astype(int)

In [41]:
# Test options and evaluation metric
num_folds = 10
num_instances = len(X_train)
seed = 7
scoring = 'accuracy'

In [60]:
model = LogisticRegression(class_weight='balanced')
results = []
names = []
kfold = cross_validation.KFold(n=num_instances, n_folds=num_folds, random_state=seed)
cv_results = cross_validation.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
results.append(cv_results)
#names.append(name)
msg = "%s: %f (%f)" % ('Logistic Regression', cv_results.mean(), cv_results.std())
print(msg)
print(cv_results)
#print(cv_results)
#print(cross_validation.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring))


Logistic Regression: 0.950000 (0.061237)
[ 1.     1.     1.     0.875  0.875  0.875  1.     1.     0.875  1.   ]

In [13]:
# fit - fit the model according to the given training data
# m.fit(iris.data,iris.target)

In [14]:
# decision_function - to predict confidence scores (signed distance of that sample to the hyperplane) for examples.
# m.decision_function(iris.data)

In [50]:
#LogisticRegression().get_params()


Out[50]:
{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'max_iter': 100,
 'multi_class': 'ovr',
 'n_jobs': 1,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [16]:
# fit_transform
# m.fit_transform(iris.data,iris.target)

In [17]:
# predict - in the output below, 0-Setosa, 1-Versicolour, 2-Virginica
# m.predict(iris.data)

In [18]:
# predict_proba
# m.predict_proba(iris.data)

In [19]:
# m.predict_log_proba(iris.data)

In [20]:
# m.score(iris.data,iris.target)

In [21]:
# m.sparsify()