This is a simple end to end example of how you can use SAS Viya for analysis The example follows these steps:
In [ ]:
option casport=5570 cashost="localhost";
cas casauto;
caslib _all_ assign;
In [ ]:
%let target = bad;
%let class_inputs = reason job;
%let class_vars = &target &class_inputs;
%let interval_inputs = im_clage clno im_debtinc loan mortdue value im_yoj im_ninq derog im_delinq;
%let all_inputs = &interval_inputs &class_inputs;
%let indata = hmeq;
In [ ]:
filename hmeq url 'http://support.sas.com/documentation/onlinedoc/viya/exampledatasets/hmeq.csv';
libname mycas cas;
proc import file=hmeq out=mycas.hmeq dbms=csv;
run;
In [ ]:
proc mdsummary data = mycas.&indata.;
var _numeric_;
output out=mycas.summary1;
run;
proc print data=mycas.summary1;
run;
In [ ]:
ods graphics;
proc sgplot data = mycas.summary1;
vbar _column_ / response=_nmiss_;
run;
In [ ]:
proc varimpute data=mycas.&indata.;
input clage / ctech = mean;
input delinq / ctech = median;
input ninq / ctech = value cvalues=2;
input debtinc yoj / ctech = value cvalues=35.0, 7, 2;
output out=mycas.out COPYVARS=(_all_);
run;
In [ ]:
%let part_data = hmeq_part;
proc partition data = mycas.out partind samppct=30 samppct2=10;
target bad;
output out=mycas.&part_data copyvars=(_all_);
run;
In [ ]:
proc treesplit data=mycas.&part_data.
outmodel=mycas.model_treesplit;
autotune;
target &target. / level=nominal;
input &class_inputs. / level=nominal;
input &interval_inputs. / level=interval;
partition rolevar = _partind_(train='0' valid='1' test='2');
output out=mycas._scored_treesplit copyvars=(_partind_ &target.);
run;
In [ ]:
/* Create Decision Trees with Gradient Boosting */
proc gradboost data=mycas.&part_data. maxdepth=8 minleafsize=5 seed=9878 outmodel=mycas.model_gradboost;
autotune;
target &target. / level=nominal;
input &class_inputs. / level=nominal;
input &interval_inputs. / level=interval;
partition rolevar = _partind_(train='0' valid='1' test='2');
output out=mycas._scored_gradboost copyvars=(_partind_ &target.);
title "Gradient Boost";
run;
In [ ]:
proc forest data=mycas.&part_data. ntrees=50 minleafsize=5 outmodel=mycas.model_forest;
autotune;
target &target. / level=nominal;
input &class_inputs. / level=nominal;
input &interval_inputs. / level=interval;
partition rolevar = _partind_(train='0' valid='1' test='2');
output out=mycas._scored_forest copyvars=(_partind_ &target);
title "Random Forest";
run;
In [ ]:
proc nnet data=mycas.&part_data.;
autotune;
architecture mlp;
target &target. / level=nominal;
input &class_inputs. / level=nominal;
input &interval_inputs. / level=interval;
hidden 7;
optimization algorithm=lbfgs maxiter=300;
partition rolevar = _partind_(train='0' valid='1' test='2');
train outmodel=mycas.model_neural seed=12345 numtries=3;
output out=mycas._scored_neural copyvars=(_partind_ &target);
title "Neural Network";
run;
In [ ]:
/* Macro to assess the different models */
%macro assess_model(prefix=, var_evt=, var_nevt=);
proc assess data=mycas._scored_&prefix. nbins=20;
input &var_evt.;
target &target. / level=nominal event='1';
fitstat pvar=&var_nevt. / pevent='0';
by _partind_;
ods output fitstat=work.&prefix._fitstat
rocinfo=work.&prefix._rocinfo
liftinfo=work.&prefix._liftinfo;
run;
%mend assess_model;
title "Assess Forest";
%assess_model(prefix=forest,
var_evt=P_BAD1,
var_nevt=P_BAD0);
title "Assess Gradient Boost";
%assess_model(prefix=gradboost,
var_evt=P_BAD1,
var_nevt=P_BAD0);
title "Assess Decision Tree";
%assess_model(prefix=treesplit,
var_evt=P_BAD1,
var_nevt=P_BAD0);
title "Assess Neural";
%assess_model(prefix=neural,
var_evt=P_BAD1,
var_nevt=P_BAD0);
/*Prepare ROC and Lift Data Sets for Plotting*/
data work.all_rocinfo;
set work.neural_rocinfo(keep=sensitivity fpr _partind_ in=n)
work.forest_rocinfo(keep=sensitivity fpr _partind_ in=f)
work.treesplit_rocinfo(keep=sensitivity fpr _partind_ in=t)
work.gradboost_rocinfo(keep=sensitivity fpr _partind_ in=g);
length model $ 16;
select;
when (n) model='Neural';
when (f) model='Forest';
when (g) model='GradientBoosting';
when (t) model='TreeSplit';
end;
run;
data work.all_liftinfo;
set work.neural_liftinfo(keep=depth lift cumlift _partind_ in=n)
work.forest_liftinfo(keep=depth lift cumlift _partind_ in=f)
work.treesplit_liftinfo(keep=depth lift cumlift _partind_ in=t)
work.gradboost_liftinfo(keep=depth lift cumlift _partind_ in=g);
length model $ 16;
select;
when (n) model='Neural';
when (f) model='Forest';
when (g) model='GradientBoosting';
when (t) model='TreeSplit';
end;
run;
In [ ]:
/*Plot ROC Curves*/
ods graphics on;
/* _partind_=2 specifies the test partition */
proc sgplot data=work.all_rocinfo(where=(_partind_=2)) aspect=1;
title "ROC Curves for Test Partition (Unbiased)";
series x=fpr y=sensitivity / group=model;
lineparm x=0 y=0 slope=1 / transparency=.7;
yaxis values=(0 to 1 by 0.25) grid offsetmin=.05 offsetmax=.05;
xaxis values=(0 to 1 by 0.25) grid offsetmin=.05 offsetmax=.05;
run;
In [ ]:
/*Plot Lift*/
proc sgplot data=work.all_liftinfo(where=(_partind_=2));
title "Cumulative Lift Chart for Test Partition (Unbiased)";
xaxis label="Percentile" grid;
series x=depth y=CumLift / group=model markers
markerattrs=(symbol=circlefilled);
run;
In [ ]:
/* Create Fit Statistics */
%macro print_fitstats(prefix=);
proc print data=work.&prefix._fitstat;
run;
%mend print_fitstats;
title "Forest Fit Statistics";
%print_fitstats(prefix=forest);
title "Gradient Boosting Fit Statistics";
%print_fitstats(prefix=gradboost);
title "Neural Fit Statistics";
%print_fitstats(prefix=neural);
title "TreeSplit Fit Statistics";
%print_fitstats(prefix=treesplit);
In [ ]: