Predicting admission based on GRE and GPA scores, using a Support Vector Machine


In [2]:
# loading necessary libraries
import pandas as pd
from sklearn import svm
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [6]:
# loading the data
df = pd.read_csv("http://www.ats.ucla.edu/stat/data/binary.csv")

# inspect the data
print(df.head())
print(df.shape)
print(df.columns)


   admit  gre   gpa  rank
0      0  380  3.61     3
1      1  660  3.67     3
2      1  800  4.00     1
3      1  640  3.19     4
4      0  520  2.93     4
(400, 4)
Index([u'admit', u'gre', u'gpa', u'rank'], dtype='object')

In [ ]:
# normalize dataset
def normalize(series):
    return (series - series.min()) / (series.max() - series.min())

df.gre = normalize(df.gre)
df.gpa = normalize(df.gpa)

In [25]:
# splitting the dataset into training and test sets
features = df.loc[:, ['gre', 'gpa']]
labels = df['admit']

features_train, features_test, labels_train, labels_test = train_test_split(features, labels,
                              test_size = 0.33, random_state = 22)

In [28]:
# creating and fitting our classifier
clf = svm.SVC()
clf.fit(features_train, labels_train)

# computing predictions
predictions = clf.predict(features_test)

# computing accuracy score
accuracy_score(labels_test, predictions)


Out[28]:
0.71212121212121215

In [1]:
# TODO: plot the decision boundary