In [2]:
# loading necessary libraries
import pandas as pd
from sklearn import svm
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
In [6]:
# loading the data
df = pd.read_csv("http://www.ats.ucla.edu/stat/data/binary.csv")
# inspect the data
print(df.head())
print(df.shape)
print(df.columns)
In [ ]:
# normalize dataset
def normalize(series):
return (series - series.min()) / (series.max() - series.min())
df.gre = normalize(df.gre)
df.gpa = normalize(df.gpa)
In [25]:
# splitting the dataset into training and test sets
features = df.loc[:, ['gre', 'gpa']]
labels = df['admit']
features_train, features_test, labels_train, labels_test = train_test_split(features, labels,
test_size = 0.33, random_state = 22)
In [28]:
# creating and fitting our classifier
clf = svm.SVC()
clf.fit(features_train, labels_train)
# computing predictions
predictions = clf.predict(features_test)
# computing accuracy score
accuracy_score(labels_test, predictions)
Out[28]:
In [1]:
# TODO: plot the decision boundary