In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
csv = np.genfromtxt('train_data.csv', delimiter = ",")
X = csv[1:, 0:4]
y = csv[1:, 4]
In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state = 0)
In [3]:
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
lin_reg = linear_model.LinearRegression()
lin_reg.fit(X_train, y_train)
scores = cross_val_score(lin_reg, X_train, y_train, cv=6)
print("Mean of cross validation scores: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [4]:
print("Accuracy: {0:.2f}%".format(lin_reg.score(X_test, y_test) * 100))
In [5]:
csv_test = np.genfromtxt('test_input.csv', delimiter = ",")
var_test = csv_test[1:, 0:4]
In [6]:
pred = lin_reg.predict(var_test)
np.savetxt("test_output.csv", pred, delimiter=",")
In [7]:
import numpy as np
data = np.genfromtxt('ex2data1.txt', delimiter = ",")
y = data[:, 2]
X = data[:, 0:2]
In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state = 0)
In [9]:
from sklearn import linear_model
log_reg = linear_model.LogisticRegression()
log_reg.fit(X_train, y_train)
scores = cross_val_score(log_reg, X_train, y_train, cv=5)
print("Mean of cross validation scores: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [10]:
print("Accuracy : {0:.2f} %".format(log_reg.score(X_test, y_test) * 100))
In [11]:
import numpy as np
data = np.genfromtxt('ex2data2.txt', delimiter = ",")
y = data[:, 2]
X = data[:, 0:2]
In [12]:
def mapFeature(X1, X2):
if isinstance(X1, np.float64) :
l = 1
else:
l = len(X1)
X1.reshape((l, 1))
X2.reshape((l, 1))
degree = 6
out = np.ones((l, 28))
k = 1
for i in range(1, degree + 1):
for j in range(0, i + 1):
out[:, k] = (np.power(X1, (i-j))) * np.power(X2, j)
k = k + 1
return out
In [13]:
X = mapFeature(X[:, 0], X[:, 1])
In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state = 0)
In [15]:
from sklearn import linear_model
log_reg = linear_model.LogisticRegression(C = 10)
log_reg.fit(X_train, y_train)
scores = cross_val_score(log_reg, X_train, y_train, cv=5)
print("Mean of cross validation scores: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [16]:
print("Accuracy : {0:.2f} %".format(log_reg.score(X_test, y_test) * 100))
In [17]:
import numpy as np
import scipy.io
data = scipy.io.loadmat("ex3data1.mat")
X = np.array(data['X'])
y = np.array(data['y'])
In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state = 40)
In [21]:
from sklearn.linear_model import LogisticRegression
multi_clf = LogisticRegression(C=50, multi_class='ovr', penalty='l2', solver='lbfgs', tol=4.5)
multi_clf.fit(X_train, y_train)
Out[21]:
In [22]:
print("Accuracy : {0:.2f} %".format(multi_clf.score(X_test, y_test) * 100))
In [23]:
import numpy as np
from sklearn import datasets
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state = 0)
In [25]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
scores = cross_val_score(knn, X_train, y_train, cv=5)
print("Mean of cross validation scores: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [26]:
print("Accuracy : {0:.2f} %".format(knn.score(X_test, y_test) * 100))