In [1]:
from IPython.core.display import HTML
import os
def css_styling():
"""Load default custom.css file from ipython profile"""
base = os.getcwd()
styles = "<style>\n%s\n</style>" % (open(os.path.join(base,'files/custom.css'),'r').read())
return HTML(styles)
css_styling()
Out[1]:
En Machine learning, computadoras aplican tecnicas de aprendizaje estadistico para automaticamente reconocer patrones en los datos.
Estas tecnicas se pueden utilizar para predecir, clasificar, ajustar modelos, descubrir patrones y reducir dimencionalidad.
Para ello utilizaremos la libreria Scikit-learn:
In [1]:
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import sklearn.datasets as datasets
%matplotlib inline
In [2]:
X, Y = datasets.make_blobs(centers=2)
print("Informacion sobre X:")
print(X.shape)
print(X)
print("Informacion sobre Y:")
print(Y.shape)
print(Y)
In [3]:
plt.scatter(X[:,0], X[:,1], c=Y);
In [6]:
from sklearn.cluster import KMeans
kmeans = KMeans(4)
Y_pred = kmeans.fit(X).labels_
print(Y_pred)
mas bonito
In [7]:
plt.scatter(X[:,0], X[:,1], c=Y_pred);
podemos cuantificar el error para ver que tal
In [8]:
error=kmeans.score(X,Y)
print("El error es : %f "%error)
y visualizar los centros:
In [9]:
plt.scatter(X[:,0], X[:,1], c=Y_pred, alpha=0.4)
mu = kmeans.cluster_centers_
plt.scatter(mu[:,0], mu[:,1], s=100, c=np.unique(Y_pred))
print mu
In [14]:
ks =[ 2,5,8,10,20,40,60,80,100]
error=[]
for k in ks:
kmeans = KMeans(k)
kmeans.fit(X)
error.append(kmeans.score(X,Y))
In [15]:
plt.plot(ks,error,'-o')
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
from sklearn.cluster import KMeans
from sklearn.utils import shuffle
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
Y = digits.target
#X_digits, _,_, Y_digits = load_digits() # datos MNIST
#X_digits, Y_digits = shuffle(X_digits,Y_digits) # movemos los datos aleatoriamente
#X_digits = X_digits[-5000:] # take only the last instances, to shorten runtime of KMeans
In [ ]:
vamos a ver que acabamos de lodear:
In [ ]:
plt.rc("image", cmap="binary") # use black/white palette for plotting
for i in xrange(10):
plt.subplot(2,5,i+1)
plt.imshow(X[i].reshape(28,28))
plt.xticks(())
plt.yticks(())
plt.tight_layout()
corremos k-means
In [ ]:
kmeans = KMeans(20)
mu_digits = kmeans.fit(X).cluster_centers_
y visualizamos
In [ ]:
plt.figure(figsize=(16,6))
for i in xrange(2*(mu_digits.shape[0]/2)): # loop over all means
plt.subplot(2,mu_digits.shape[0]/2,i+1)
plt.imshow(mu_digits[i].reshape(32,32))
plt.xticks(())
plt.yticks(())
plt.tight_layout()
In [ ]:
In [ ]: