notebook.community

Edit and run



In [111]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error


%matplotlib inline



In [27]:

    
# import some data to play with
iris = datasets.load_iris()
type(iris)
iris.data.shape









    Out[27]:





(150, 4)



In [20]:

    
clf = SVC()
clf.fit(iris.data, iris.target_names[iris.target])
clf.predict(iris.data[:3])
iris.target_names[iris.target[:3]]









    Out[20]:





array(['setosa', 'setosa', 'setosa'], 
      dtype='<U10')



In [88]:

    
irisFrame = pd.DataFrame(iris.data, columns = iris.feature_names)
sns.pairplot(irisFrame)









    Out[88]:





<seaborn.axisgrid.PairGrid at 0xc4d7208>



In [33]:

    
#Datos
x = iris.data
y = iris.target



In [42]:

    
#Clasificador
knn = KNeighborsClassifier(n_neighbors=2)

#Ajuste a los datos. Acá ocurre la magia
knn.fit(x, y)
x_new = [[3, 5, 4, 2], [5, 4, 3, 2]]
knn.predict(x_new)









    Out[42]:





array([1, 1])



In [56]:

    
#Obtiene datos de entrenamiento y de testeo, junto con los targets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=4)



In [57]:

    
# try K=1 through K=25 and record testing accuracy
k_range = range(1, 26)
scores = []
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))
plt.plot(scores)









    Out[57]:





[<matplotlib.lines.Line2D at 0xc017780>]



In [117]:

    
sales = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
sns.pairplot(sales, x_vars=["TV", "Newspaper", "Radio"], y_vars="Sales",kind='reg')
features = ['TV', 'Radio', 'Newspaper']
x = sales[features]
y = sales["Sales"]

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)

#Método regresor
linreg = LinearRegression()

# fit the model to the training data (learn the coefficients)
linreg.fit(x_train, y_train)

print(list(zip(features, linreg.coef_)))

#Predicción, lease f(x_test)
y_pred = linreg.predict(x_test)

#Y el error cuadrático. (f(x_text) - y_test)**2
print(np.sqrt(mean_squared_error(y_test, y_pred)))









    



[('TV', 0.046564567874150253), ('Radio', 0.17915812245088844), ('Newspaper', 0.0034504647111804482)]
1.40465142303



In [ ]: