SVM Feature Selected Classifiers without gender/ethnicity

In order to enable visualizing factors that school districts could readily change versus would be more difficult to change, we needed two more runs of our best models - SVM Feature Selected without gender and ethnicity features for classifying high graduation and low graduation.

SVM Feature Selected Classifiers without gender/ethnicity
- 1. High Graduation - No Gender/Ethnicity
- 2. Low Graduation - No Gender/Ethnicity



In [1]:

    
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")



In [2]:

    
dftouse=pd.read_csv("data/finaldata/dftouse.csv")



In [3]:

    
STANDARDIZABLE = ['num_students', 'num_schools','num_charter_schools','num_pub_schools','tcuresal_percent','pupil_teacher_ratio_dist', 'pupil_teacher_ratio_ps', 'totalrev_pp','tlocrev_pp','tsrev_pp','tfedrev_pp','tcurinst_pp','tcurssv_pp','tcuroth_pp','tcursalary_pp','tcurbenefits_pp','totalexp_pp','tcapout_pp','tnonelse_pp','tcurelsc_pp','instexp_pp','tcurinst_percent','tcuroth_percent','tcurelsc_percent','tcurssvc_percent','tfedrev_percent','tlocrev_percent','tsrev_percent','r_ELL','r_IEP','r_lunch_free','r_lunch_reduced','r_stud_PKK','r_stud_18','r_stud_912','r_st_PKT','r_st_KT','r_st_ET','r_st_ST','r_st_UT','r_st_TS','r_st_IA','r_st_IC','r_st_EGC','r_st_SGC','r_st_OGC','r_st_TGC','r_st_LMS','r_st_LMSS','r_st_LEA','r_st_LEASS','r_st_SA','r_st_SASS','r_st_SSSS','r_st_OSSS','r_lrev_pt','r_lrev_gst','r_lrev_put','r_lrev_it','r_lrev_aot','r_lrev_pgc','r_lrev_cc','r_lrev_oss','r_lrev_tui','r_lrev_trans','r_lrev_slr','r_lrev_ts','r_lrev_sar','r_lrev_osalserv','r_lrev_sfns','r_lrev_ie','r_lrev_molr','r_lrev_sp','r_lrev_rr','r_lrev_sale','r_lrev_ff','r_lrev_pc','r_srev_gfa','r_srev_sep','r_srev_trans','r_srev_sip','r_srev_cbsp','r_srev_vep','r_srev_codsp','r_srev_bep','r_srev_gt','r_srev_slp','r_srev_aor','r_srev_splea','r_srev_osp','r_srev_ns','r_frev_title1','r_frev_dis','r_frev_cna','r_frev_ems','r_frev_dfs','r_frev_voc','r_frev_ao','r_frev_ns','r_frev_ia','r_frev_be','r_frev_na','r_frev_aofed']
print STANDARDIZABLE









    



['num_students', 'num_schools', 'num_charter_schools', 'num_pub_schools', 'tcuresal_percent', 'pupil_teacher_ratio_dist', 'pupil_teacher_ratio_ps', 'totalrev_pp', 'tlocrev_pp', 'tsrev_pp', 'tfedrev_pp', 'tcurinst_pp', 'tcurssv_pp', 'tcuroth_pp', 'tcursalary_pp', 'tcurbenefits_pp', 'totalexp_pp', 'tcapout_pp', 'tnonelse_pp', 'tcurelsc_pp', 'instexp_pp', 'tcurinst_percent', 'tcuroth_percent', 'tcurelsc_percent', 'tcurssvc_percent', 'tfedrev_percent', 'tlocrev_percent', 'tsrev_percent', 'r_ELL', 'r_IEP', 'r_lunch_free', 'r_lunch_reduced', 'r_stud_PKK', 'r_stud_18', 'r_stud_912', 'r_st_PKT', 'r_st_KT', 'r_st_ET', 'r_st_ST', 'r_st_UT', 'r_st_TS', 'r_st_IA', 'r_st_IC', 'r_st_EGC', 'r_st_SGC', 'r_st_OGC', 'r_st_TGC', 'r_st_LMS', 'r_st_LMSS', 'r_st_LEA', 'r_st_LEASS', 'r_st_SA', 'r_st_SASS', 'r_st_SSSS', 'r_st_OSSS', 'r_lrev_pt', 'r_lrev_gst', 'r_lrev_put', 'r_lrev_it', 'r_lrev_aot', 'r_lrev_pgc', 'r_lrev_cc', 'r_lrev_oss', 'r_lrev_tui', 'r_lrev_trans', 'r_lrev_slr', 'r_lrev_ts', 'r_lrev_sar', 'r_lrev_osalserv', 'r_lrev_sfns', 'r_lrev_ie', 'r_lrev_molr', 'r_lrev_sp', 'r_lrev_rr', 'r_lrev_sale', 'r_lrev_ff', 'r_lrev_pc', 'r_srev_gfa', 'r_srev_sep', 'r_srev_trans', 'r_srev_sip', 'r_srev_cbsp', 'r_srev_vep', 'r_srev_codsp', 'r_srev_bep', 'r_srev_gt', 'r_srev_slp', 'r_srev_aor', 'r_srev_splea', 'r_srev_osp', 'r_srev_ns', 'r_frev_title1', 'r_frev_dis', 'r_frev_cna', 'r_frev_ems', 'r_frev_dfs', 'r_frev_voc', 'r_frev_ao', 'r_frev_ns', 'r_frev_ia', 'r_frev_be', 'r_frev_na', 'r_frev_aofed']



In [4]:

    
INDICATORS = []
for v in dftouse.columns:
    l=np.unique(dftouse[v])
    if len(l) <= 10:
        INDICATORS.append(v)
        
INDICATORS.remove('RESP_High_Graduation')        
INDICATORS.remove('RESP_Low_Graduation')  
print INDICATORS









    



['i_agency_type_local_school_district', 'i_agency_type_local_school_district_sup_union', 'i_agency_type_regional_education_services', 'i_agency_type_charter_school_agency', 'i_fin_sdlc_sec', 'i_fin_sdlc_elem_sec', 'i_fin_sdlc_voc', 'i_ucl_city_large', 'i_ucl_city_mid', 'i_ucl_city_small', 'i_ucl_suburb_large', 'i_ucl_suburb_mid', 'i_ucl_suburb_small', 'i_ucl_town_fringe', 'i_ucl_town_distant', 'i_ucl_town_remote', 'i_ucl_rural_fringe', 'i_ucl_rural_distant', 'i_ucl_rural_remote', 'i_cs_all_charter', 'i_cs_charter_noncharter', 'i_cs_all_noncharter', 'i_ma_ne_nr', 'i_ma_metropolitan', 'i_ma_micropolitan', 'i_lgo_K', 'i_lgo_PK']



In [5]:

    
#CITATION: From HW3
from sklearn.cross_validation import train_test_split
itrain, itest = train_test_split(xrange(dftouse.shape[0]), train_size=0.7)



In [6]:

    
#CITATION: From HW3
mask=np.ones(dftouse.shape[0], dtype='int')
mask[itrain]=1
mask[itest]=0
mask = (mask==1)



In [8]:

    
lcols=STANDARDIZABLE+INDICATORS
Xmatrix=dftouse[lcols].values
Yresp=dftouse['RESP_High_Graduation'].values
#CITATION: From HW3
Xmatrix_train=Xmatrix[mask]
Xmatrix_test=Xmatrix[~mask]
Yresp_train=Yresp[mask]
Yresp_test=Yresp[~mask]



In [9]:

    
# make sure we didn't get unlucky in our mask selection
print "% High_Graduation in Training:", np.mean(dftouse['RESP_High_Graduation'][mask])
print "% High_Graduation in Testing:", np.mean(dftouse['RESP_High_Graduation'][~mask])
print "% Low_Graduation in Training:", np.mean(dftouse['RESP_Low_Graduation'][mask])
print "% Low_Graduation in Testing:", np.mean(dftouse['RESP_Low_Graduation'][~mask])









    



% High_Graduation in Training: 0.247979864883
% High_Graduation in Testing: 0.251545117429
% Low_Graduation in Training: 0.249834415154
% Low_Graduation in Testing: 0.253708281829



In [10]:

    
#CITATION: From HW3
from sklearn.preprocessing import StandardScaler

for col in STANDARDIZABLE:
    #print col
    valstrain=dftouse[col].values[mask]
    valstest=dftouse[col].values[~mask]
    scaler=StandardScaler().fit(valstrain)
    outtrain=scaler.transform(valstrain)
    outtest=scaler.fit_transform(valstest)
    out=np.empty(mask.shape[0])
    out[mask]=outtrain
    out[~mask]=outtest
    dftouse[col]=out



In [12]:

    
#CITATION: From HW3
from sklearn.grid_search import GridSearchCV



In [13]:

    
#CITATION: From HW3
def cv_optimize(clf, parameters, X, y, n_folds=5, score_func=None):
    if score_func:
        gs = GridSearchCV(clf, param_grid=parameters, cv=n_folds, scoring=score_func)
    else:
        gs = GridSearchCV(clf, param_grid=parameters, cv=n_folds)
    gs.fit(X, y)
    print "BEST", gs.best_params_, gs.best_score_, gs.grid_scores_
    best = gs.best_estimator_
    return best



In [14]:

    
#CITATION: From HW3
from sklearn.metrics import confusion_matrix
def do_classify(clf, parameters, indf, featurenames, targetname, target1val, mask=None, reuse_split=None, score_func=None, n_folds=5):
    subdf=indf[featurenames]
    X=subdf.values
    y=(indf[targetname].values==target1val)*1
    if mask !=None:
        print "using mask"
        Xtrain, Xtest, ytrain, ytest = X[mask], X[~mask], y[mask], y[~mask]
    if reuse_split !=None:
        print "using reuse split"
        Xtrain, Xtest, ytrain, ytest = reuse_split['Xtrain'], reuse_split['Xtest'], reuse_split['ytrain'], reuse_split['ytest']
    if parameters:
        clf = cv_optimize(clf, parameters, Xtrain, ytrain, n_folds=n_folds, score_func=score_func)
    clf=clf.fit(Xtrain, ytrain)
    training_accuracy = clf.score(Xtrain, ytrain)
    test_accuracy = clf.score(Xtest, ytest)
    print "############# based on standard predict ################"
    print "Accuracy on training data: %0.2f" % (training_accuracy)
    print "Accuracy on test data:     %0.2f" % (test_accuracy)
    print confusion_matrix(ytest, clf.predict(Xtest))
    print "########################################################"
    return clf, Xtrain, ytrain, Xtest, ytest



In [15]:

    
#CITATION: From HW3
def nonzero_lasso(clf):
    featuremask=(clf.coef_ !=0.0)[0]
    return pd.DataFrame(dict(feature=lcols, coef=clf.coef_[0], abscoef=np.abs(clf.coef_[0])))[featuremask].sort('abscoef', ascending=False)

High Graduation - No Gender/Ethnicity



In [16]:

    
from scipy.stats.stats import pearsonr



In [17]:

    
#CITATION: From HW3

correlations=[]
dftousetrain=dftouse[mask]
for col in lcols:
    r=pearsonr(dftousetrain[col], dftousetrain['RESP_High_Graduation'])[0]
    correlations.append(dict(feature=col,corr=r, abscorr=np.abs(r)))

bpdf=pd.DataFrame(correlations).sort('abscorr', ascending=False)
bpdf.set_index(['feature'], inplace=True)
bpdf.head(25)









    Out[17]:






  
    
      
      abscorr
      corr
    
    
      feature
      
      
    
  
  
    
      r_lunch_free
      0.300986
      -0.300986
    
    
      tlocrev_percent
      0.219284
      0.219284
    
    
      r_stud_18
      0.215640
      -0.215640
    
    
      r_stud_912
      0.213236
      0.213236
    
    
      tfedrev_percent
      0.195283
      -0.195283
    
    
      tlocrev_pp
      0.179521
      0.179521
    
    
      tsrev_percent
      0.166873
      -0.166873
    
    
      r_frev_dis
      0.166860
      0.166860
    
    
      r_frev_title1
      0.154755
      -0.154755
    
    
      r_frev_cna
      0.136677
      -0.136677
    
    
      r_srev_sep
      0.123350
      0.123350
    
    
      i_ucl_suburb_large
      0.116087
      0.116087
    
    
      tcuroth_percent
      0.113994
      -0.113994
    
    
      r_srev_gfa
      0.110739
      -0.110739
    
    
      r_ELL
      0.104733
      -0.104733
    
    
      tcurinst_pp
      0.097023
      0.097023
    
    
      instexp_pp
      0.097023
      0.097023
    
    
      num_pub_schools
      0.094597
      -0.094597
    
    
      num_schools
      0.094456
      -0.094456
    
    
      r_lunch_reduced
      0.094145
      -0.094145
    
    
      num_students
      0.080523
      -0.080523
    
    
      i_fin_sdlc_elem_sec
      0.077957
      -0.077957
    
    
      tcursalary_pp
      0.076917
      0.076917
    
    
      r_stud_PKK
      0.075947
      -0.075947
    
    
      tcurelsc_pp
      0.075150
      0.075150



In [18]:

    
#CITATION: From HW3
from sklearn import feature_selection
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest



In [19]:

    
#CITATION: From HW3
def pearson_scorer(X,y):
    rs=np.zeros(X.shape[1])
    pvals=np.zeros(X.shape[1])
    i=0
    for v in X.T:
        rs[i], pvals[i]=pearsonr(v, y)
        i=i+1
    return np.abs(rs), pvals



In [21]:

    
#CITATION: From HW3
from sklearn.svm import LinearSVC



In [22]:

    
#CITATION: From HW3
selectorlinearsvm = SelectKBest(k=25, score_func=pearson_scorer)
pipelinearsvm = Pipeline([('select', selectorlinearsvm), ('svm', LinearSVC(loss="hinge"))])



In [28]:

    
%%time
pipelinearsvm, Xtrain, ytrain, Xtest, ytest  = do_classify(pipelinearsvm, {"svm__C": [0.00001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]}, dftouse,lcols, 'RESP_High_Graduation',1, mask=mask)
#CITATION: From HW3









    



using mask
BEST {'svm__C': 1.0} 0.761955225858 [mean: 0.75162, std: 0.01238, params: {'svm__C': 1e-05}, mean: 0.75348, std: 0.00289, params: {'svm__C': 0.001}, mean: 0.75599, std: 0.00278, params: {'svm__C': 0.01}, mean: 0.75891, std: 0.00513, params: {'svm__C': 0.1}, mean: 0.76196, std: 0.00542, params: {'svm__C': 1.0}, mean: 0.75758, std: 0.00536, params: {'svm__C': 10.0}, mean: 0.74606, std: 0.01885, params: {'svm__C': 100.0}]
############# based on standard predict ################
Accuracy on training data: 0.76
Accuracy on test data:     0.76
[[2399   23]
 [ 755   59]]
########################################################
CPU times: user 9.17 s, sys: 110 ms, total: 9.28 s
Wall time: 9.35 s






    



/Users/ChaserAcer/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:7: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.



In [29]:

    
#CITATION: From HW3
np.array(lcols)[pipelinearsvm.get_params()['select'].get_support()]









    Out[29]:





array(['num_students', 'num_schools', 'num_pub_schools', 'tlocrev_pp',
       'tcurinst_pp', 'tcursalary_pp', 'tcurelsc_pp', 'instexp_pp',
       'tcuroth_percent', 'tfedrev_percent', 'tlocrev_percent',
       'tsrev_percent', 'r_ELL', 'r_lunch_free', 'r_lunch_reduced',
       'r_stud_PKK', 'r_stud_18', 'r_stud_912', 'r_srev_gfa', 'r_srev_sep',
       'r_frev_title1', 'r_frev_dis', 'r_frev_cna', 'i_fin_sdlc_elem_sec',
       'i_ucl_suburb_large'], 
      dtype='|S45')



In [30]:

    
#CITATION: From HW3
from sklearn.metrics import roc_curve, auc
def make_roc(name, clf, ytest, xtest, ax=None, labe=5, proba=True, skip=0):
    initial=False
    if not ax:
        ax=plt.gca()
        initial=True
    if proba:#for stuff like logistic regression
        fpr, tpr, thresholds=roc_curve(ytest, clf.predict_proba(xtest)[:,1])
    else:#for stuff like SVM
        fpr, tpr, thresholds=roc_curve(ytest, clf.decision_function(xtest))
    roc_auc = auc(fpr, tpr)
    if skip:
        l=fpr.shape[0]
        ax.plot(fpr[0:l:skip], tpr[0:l:skip], '.-', alpha=0.3, label='ROC curve for %s (area = %0.2f)' % (name, roc_auc))
    else:
        ax.plot(fpr, tpr, '.-', alpha=0.3, label='ROC curve for %s (area = %0.2f)' % (name, roc_auc))
    label_kwargs = {}
    label_kwargs['bbox'] = dict(
        boxstyle='round,pad=0.3', alpha=0.2,
    )
    if labe!=None:
        for k in xrange(0, fpr.shape[0],labe):
            #from https://gist.github.com/podshumok/c1d1c9394335d86255b8
            threshold = str(np.round(thresholds[k], 2))
            ax.annotate(threshold, (fpr[k], tpr[k]), **label_kwargs)
    if initial:
        ax.plot([0, 1], [0, 1], 'k--')
        ax.set_xlim([0.0, 1.0])
        ax.set_ylim([0.0, 1.05])
        ax.set_xlabel('False Positive Rate')
        ax.set_ylabel('True Positive Rate')
        ax.set_title('ROC')
    ax.legend(loc="lower right")
    return ax



In [32]:

    
#CITATION: From HW3
with sns.color_palette("dark"):
    ax=make_roc("svm-feature-selected",pipelinearsvm, ytest, Xtest, labe=200, skip=50, proba=False)

Low Graduation - No Gender/Ethnicity



In [33]:

    
Xmatrix=dftouse[lcols].values
Yresp=dftouse['RESP_Low_Graduation'].values



In [34]:

    
#CITATION: From HW3
correlations=[]
dftousetrain=dftouse[mask]
for col in lcols:
    r=pearsonr(dftousetrain[col], dftousetrain['RESP_Low_Graduation'])[0]
    correlations.append(dict(feature=col,corr=r, abscorr=np.abs(r)))

bpdf=pd.DataFrame(correlations).sort('abscorr', ascending=False)
bpdf.set_index(['feature'], inplace=True)
bpdf.head(25)









    Out[34]:






  
    
      
      abscorr
      corr
    
    
      feature
      
      
    
  
  
    
      r_lunch_free
      0.431374
      0.431374
    
    
      tfedrev_percent
      0.309397
      0.309397
    
    
      tlocrev_percent
      0.250340
      -0.250340
    
    
      tcurinst_percent
      0.174324
      -0.174324
    
    
      r_frev_title1
      0.170846
      0.170846
    
    
      r_ELL
      0.162813
      0.162813
    
    
      tfedrev_pp
      0.156278
      0.156278
    
    
      tsrev_percent
      0.152960
      0.152960
    
    
      tcurssvc_percent
      0.144028
      0.144028
    
    
      r_lrev_gst
      0.141805
      0.141805
    
    
      tlocrev_pp
      0.133834
      -0.133834
    
    
      r_frev_dis
      0.131028
      -0.131028
    
    
      i_cs_all_noncharter
      0.130854
      -0.130854
    
    
      num_pub_schools
      0.130399
      0.130399
    
    
      num_schools
      0.130182
      0.130182
    
    
      r_lrev_molr
      0.126038
      0.126038
    
    
      i_ucl_city_large
      0.126009
      0.126009
    
    
      i_agency_type_charter_school_agency
      0.120099
      0.120099
    
    
      i_cs_all_charter
      0.120099
      0.120099
    
    
      tsrev_pp
      0.119239
      0.119239
    
    
      r_srev_cbsp
      0.117289
      0.117289
    
    
      num_students
      0.106727
      0.106727
    
    
      i_agency_type_local_school_district
      0.100594
      -0.100594
    
    
      r_lrev_it
      0.099386
      -0.099386
    
    
      tcuroth_percent
      0.097678
      0.097678



In [35]:

    
#CITATION: From HW3
selectorlinearsvm = SelectKBest(k=25, score_func=pearson_scorer)
pipelinearsvm = Pipeline([('select', selectorlinearsvm), ('svm', LinearSVC(loss="hinge"))])



In [37]:

    
%%time
pipelinearsvm, Xtrain, ytrain, Xtest, ytest  = do_classify(pipelinearsvm, {"svm__C": [0.00001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]}, dftouse,lcols, 'RESP_Low_Graduation',1, mask=mask)
#CITATION: From HW3









    



using mask
BEST {'svm__C': 1.0} 0.792820241092 [mean: 0.78169, std: 0.00786, params: {'svm__C': 1e-05}, mean: 0.78050, std: 0.00383, params: {'svm__C': 0.001}, mean: 0.79176, std: 0.00184, params: {'svm__C': 0.01}, mean: 0.79216, std: 0.00221, params: {'svm__C': 0.1}, mean: 0.79282, std: 0.00218, params: {'svm__C': 1.0}, mean: 0.79176, std: 0.00391, params: {'svm__C': 10.0}, mean: 0.76858, std: 0.01030, params: {'svm__C': 100.0}]
############# based on standard predict ################
Accuracy on training data: 0.79
Accuracy on test data:     0.78
[[2327   88]
 [ 617  204]]
########################################################
CPU times: user 8.27 s, sys: 138 ms, total: 8.4 s
Wall time: 8.54 s






    



/Users/ChaserAcer/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:7: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.



In [38]:

    
#CITATION: From HW3
np.array(lcols)[pipelinearsvm.get_params()['select'].get_support()]









    Out[38]:





array(['num_students', 'num_schools', 'num_pub_schools', 'tlocrev_pp',
       'tsrev_pp', 'tfedrev_pp', 'tcurinst_percent', 'tcuroth_percent',
       'tcurssvc_percent', 'tfedrev_percent', 'tlocrev_percent',
       'tsrev_percent', 'r_ELL', 'r_lunch_free', 'r_lrev_gst', 'r_lrev_it',
       'r_lrev_molr', 'r_srev_cbsp', 'r_frev_title1', 'r_frev_dis',
       'i_agency_type_local_school_district',
       'i_agency_type_charter_school_agency', 'i_ucl_city_large',
       'i_cs_all_charter', 'i_cs_all_noncharter'], 
      dtype='|S45')



In [39]:

    
with sns.color_palette("dark"):
    ax=make_roc("svm-feature-selected",pipelinearsvm, ytest, Xtest, labe=200, skip=50, proba=False)

	abscorr	corr
feature
r_lunch_free	0.300986	-0.300986
tlocrev_percent	0.219284	0.219284
r_stud_18	0.215640	-0.215640
r_stud_912	0.213236	0.213236
tfedrev_percent	0.195283	-0.195283
tlocrev_pp	0.179521	0.179521
tsrev_percent	0.166873	-0.166873
r_frev_dis	0.166860	0.166860
r_frev_title1	0.154755	-0.154755
r_frev_cna	0.136677	-0.136677
r_srev_sep	0.123350	0.123350
i_ucl_suburb_large	0.116087	0.116087
tcuroth_percent	0.113994	-0.113994
r_srev_gfa	0.110739	-0.110739
r_ELL	0.104733	-0.104733
tcurinst_pp	0.097023	0.097023
instexp_pp	0.097023	0.097023
num_pub_schools	0.094597	-0.094597
num_schools	0.094456	-0.094456
r_lunch_reduced	0.094145	-0.094145
num_students	0.080523	-0.080523
i_fin_sdlc_elem_sec	0.077957	-0.077957
tcursalary_pp	0.076917	0.076917
r_stud_PKK	0.075947	-0.075947
tcurelsc_pp	0.075150	0.075150

	abscorr	corr
feature
r_lunch_free	0.431374	0.431374
tfedrev_percent	0.309397	0.309397
tlocrev_percent	0.250340	-0.250340
tcurinst_percent	0.174324	-0.174324
r_frev_title1	0.170846	0.170846
r_ELL	0.162813	0.162813
tfedrev_pp	0.156278	0.156278
tsrev_percent	0.152960	0.152960
tcurssvc_percent	0.144028	0.144028
r_lrev_gst	0.141805	0.141805
tlocrev_pp	0.133834	-0.133834
r_frev_dis	0.131028	-0.131028
i_cs_all_noncharter	0.130854	-0.130854
num_pub_schools	0.130399	0.130399
num_schools	0.130182	0.130182
r_lrev_molr	0.126038	0.126038
i_ucl_city_large	0.126009	0.126009
i_agency_type_charter_school_agency	0.120099	0.120099
i_cs_all_charter	0.120099	0.120099
tsrev_pp	0.119239	0.119239
r_srev_cbsp	0.117289	0.117289
num_students	0.106727	0.106727
i_agency_type_local_school_district	0.100594	-0.100594
r_lrev_it	0.099386	-0.099386
tcuroth_percent	0.097678	0.097678

SVM Feature Selected Classifiers without gender/ethnicity

Table of Contents

High Graduation - No Gender/Ethnicity

Low Graduation - No Gender/Ethnicity