In [62]:
# preprocessing: numbers-->mathematical operations
# Define neighbourhood, classification
In [7]:
from sklearn import datasets
In [8]:
iris_data = datasets.load_iris()
In [9]:
# Define target value, feature value
In [10]:
iris_data.target
Out[10]:
In [11]:
iris_data.data[:5]
Out[11]:
In [12]:
iris_data.feature_names
Out[12]:
In [13]:
iris_data.target_names
Out[13]:
In [14]:
# Why we need train test?
In [15]:
from sklearn.model_selection import train_test_split
In [18]:
X_train, X_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target, test_size=0.3)
In [19]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
In [20]:
# Explain train-test ratio, random_seed
In [21]:
# Explain basic working of knn, rnn, weighted/non-weghted, Brute force, order of execution
In [22]:
from sklearn.neighbors import KNeighborsClassifier
In [23]:
knn = KNeighborsClassifier(n_neighbors=5)
In [24]:
knn.fit(X_train, y_train)
Out[24]:
In [25]:
# Perform model evaluation on validate/test set, explain confusion matrix, accuracy
# Explain prediction step
# Confusion matrix
# Tp, TN, FN(type1 error), FP(type2), disease prediction
# accuracy=tp+tn/total
# error rate=1-accuracy
# recall=tp/tp+fn(when true, how many predicted yes)
# precision=tp/tp+fp(when true, how many were true)
In [26]:
from sklearn import metrics
In [27]:
y_val_pred = knn.predict(X_val)
In [28]:
print("Accuracy:",metrics.accuracy_score(y_val, y_val_pred))
In [29]:
y_test_pred = knn.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_test_pred))
In [30]:
# Define learned parameter, hyperparameter, curse of dim, over/under fitting
# Explain k-fold-cross-validation, GridSearch
In [31]:
from sklearn.model_selection import GridSearchCV
In [32]:
parameters = {'n_neighbors':[1,2,3,4], 'p':[1,3]}
In [33]:
# Explain n_neighbour, and P for knn
In [63]:
model = GridSearchCV(knn, param_grid=parameters)
In [64]:
model.fit(X_train,y_train)
Out[64]:
In [65]:
model.best_estimator_
Out[65]:
In [66]:
model.best_score_
Out[66]:
In [67]:
knn_best=model.best_estimator_
In [68]:
knn_best.score(X_val,y_val)
Out[68]:
In [69]:
knn_best.score(X_test,y_test)
Out[69]:
In [70]:
y_test_pred = knn_best.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_test_pred))
In [71]:
# Try a different classifier
In [72]:
from sklearn.tree import DecisionTreeClassifier
df=DecisionTreeClassifier()
df.fit(X_train,y_train)
y_test_pred = df.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_test_pred))
In [73]:
# Step 8: Save, load, serve model
# Mention flask API, pickel, model_persistance
In [74]:
import pickle
_save = pickle.dumps(knn_best)
_load = pickle.loads(_save)
_load.predict(X_test)
"""
similarly there is numpy fucntions to load/save arrays
from joblib import dump, load
dump(model, 'path/model_name.joblib')
_loaded_model = load('path/model_filename.joblib')
"""
Out[74]:
In [75]:
# Not the actual syntax, just an intution
"""
class_model:
def _init__(self):
_loaded_model = pickle.loads("path/model_name")
"""
"""
import flask and its components
import class_model
app.model=class_model() # load it beforehand
@path=[ap1/v1/predict]
def predict(user_input):
result=app.model.predict(user_input)
return(jsonify(result))
if __name__ == "__main__":
app.run()
"""
Out[75]: