In [1]:
from IPython.display import Image
In [2]:
Image("images/ml-model.png", width=500)
Out[2]:
In [3]:
Image("images/ml-1.png", width=800)
Out[3]:
In [4]:
from sklearn.datasets import make_blobs
In [5]:
X, y = make_blobs()
k-means (метод k-средних) — наиболее популярный метод кластеризации. Он стремится минимизировать суммарное квадратичное отклонение точек кластеров от центров этих кластеров
In [2]:
from sklearn.cluster import KMeans
In [3]:
kmeans = KMeans(n_clusters=3)
In [8]:
kmeans.fit(X)
Out[8]:
In [9]:
kmeans.predict(X)
Out[9]:
In [10]:
cluster_labels = kmeans.predict(X)
In [11]:
from sklearn.metrics import accuracy_score
accuracy_score - показывает точное совпадение тренировочных и предсказаных значений
y_pred = [0, 2, 1, 3]
y_true = [0, 1, 2, 3]
accuracy_score(y_true, y_pred)
0.5
http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
In [12]:
accuracy_score(y, cluster_labels)
Out[12]:
In [16]:
Image("images/accuracy.png", width=300)
Out[16]:
In [18]:
from sklearn.datasets import load_iris
In [19]:
iris = load_iris()
X = iris.data
y = iris.target
In [20]:
#from sklearn import svm
In [23]:
# Your code here
In [24]:
# Your code here
In [25]:
# Your code here
In [26]:
Image("images/training-data.png", width=500)
Out[26]:
In [28]:
from sklearn.cross_validation import train_test_split
In [30]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.4)
In [31]:
from sklearn.neighbors import KNeighborsClassifier
In [32]:
model = KNeighborsClassifier()
In [33]:
model.fit(X_train, y_train)
Out[33]:
In [35]:
model.score(X_train, y_train)
Out[35]:
In [34]:
model.score(X_test, y_test)
Out[34]:
In [29]:
Image("images/crossvalidation.png", width=500)
Out[29]:
In [36]:
from sklearn.cross_validation import cross_val_score
In [38]:
cross_val_score(model, X, y, cv=5)
Out[38]: