# Running a linear regression using Java

In [1]:
/******************************************************************************
*  Compilation:  javac LinearRegression.java
*  Execution:    java  LinearRegression
*  Dependencies: none
*
*  Compute least squares solution to y = beta * x + alpha.
*  Simple linear regression.
*
******************************************************************************/

/**
*  The {@code LinearRegression} class performs a simple linear regression
*  on an set of <em>n</em> data points (<em>y<sub>i</sub></em>, <em>x<sub>i</sub></em>).
*  That is, it fits a straight line <em>y</em> = &alpha; + &beta; <em>x</em>,
*  (where <em>y</em> is the response variable, <em>x</em> is the predictor variable,
*  &alpha; is the <em>y-intercept</em>, and &beta; is the <em>slope</em>)
*  that minimizes the sum of squared residuals of the linear regression model.
*  It also computes associated statistics, including the coefficient of
*  determination <em>R</em><sup>2</sup> and the standard deviation of the
*  estimates for the slope and <em>y</em>-intercept.
*
*  @author Robert Sedgewick
*  @author Kevin Wayne
*/
public class LinearRegression {
private final double intercept, slope;
private final double r2;
private final double svar0, svar1;

/**
* Performs a linear regression on the data points {@code (y[i], x[i])}.
*
* @param  x the values of the predictor variable
* @param  y the corresponding values of the response variable
* @throws IllegalArgumentException if the lengths of the two arrays are not equal
*/
public LinearRegression(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("array lengths are not equal");
}
int n = x.length;

// first pass
double sumx = 0.0, sumy = 0.0, sumx2 = 0.0;
for (int i = 0; i < n; i++) {
sumx  += x[i];
sumx2 += x[i]*x[i];
sumy  += y[i];
}
double xbar = sumx / n;
double ybar = sumy / n;

// second pass: compute summary statistics
double xxbar = 0.0, yybar = 0.0, xybar = 0.0;
for (int i = 0; i < n; i++) {
xxbar += (x[i] - xbar) * (x[i] - xbar);
yybar += (y[i] - ybar) * (y[i] - ybar);
xybar += (x[i] - xbar) * (y[i] - ybar);
}
slope  = xybar / xxbar;
intercept = ybar - slope * xbar;

// more statistical analysis
double rss = 0.0;      // residual sum of squares
double ssr = 0.0;      // regression sum of squares
for (int i = 0; i < n; i++) {
double fit = slope*x[i] + intercept;
rss += (fit - y[i]) * (fit - y[i]);
ssr += (fit - ybar) * (fit - ybar);
}

int degreesOfFreedom = n-2;
r2    = ssr / yybar;
double svar  = rss / degreesOfFreedom;
svar1 = svar / xxbar;
svar0 = svar/n + xbar*xbar*svar1;
}

/**
* Returns the <em>y</em>-intercept &alpha; of the best of the best-fit line <em>y</em> = &alpha; + &beta; <em>x</em>.
*
* @return the <em>y</em>-intercept &alpha; of the best-fit line <em>y = &alpha; + &beta; x</em>
*/
public double intercept() {
return intercept;
}

/**
* Returns the slope &beta; of the best of the best-fit line <em>y</em> = &alpha; + &beta; <em>x</em>.
*
* @return the slope &beta; of the best-fit line <em>y</em> = &alpha; + &beta; <em>x</em>
*/
public double slope() {
return slope;
}

/**
* Returns the coefficient of determination <em>R</em><sup>2</sup>.
*
* @return the coefficient of determination <em>R</em><sup>2</sup>,
*         which is a real number between 0 and 1
*/
public double R2() {
return r2;
}

/**
* Returns the standard error of the estimate for the intercept.
*
* @return the standard error of the estimate for the intercept
*/
public double interceptStdErr() {
return Math.sqrt(svar0);
}

/**
* Returns the standard error of the estimate for the slope.
*
* @return the standard error of the estimate for the slope
*/
public double slopeStdErr() {
return Math.sqrt(svar1);
}

/**
* Returns the expected response {@code y} given the value of the predictor
* variable {@code x}.
*
* @param  x the value of the predictor variable
* @return the expected response {@code y} given the value of the predictor
*         variable {@code x}
*/
public double predict(double x) {
return slope*x + intercept;
}

/**
* Returns a string representation of the simple linear regression model.
*
* @return a string representation of the simple linear regression model,
*         including the best-fit line and the coefficient of determination
*         <em>R</em><sup>2</sup>
*/
public String toString() {
StringBuilder s = new StringBuilder();
s.append(String.format("%.2f n + %.2f", slope(), intercept()));
s.append("  (R^2 = " + String.format("%.3f", R2()) + ")");
return s.toString();
}

}

Out[1]:
com.twosigma.beaker.javash.bkr926364f3.LinearRegression

In [2]:
double [] x = {1.58,1.8,1.7,1.8,1.76,1.73,1.63,1.65,1.56,1.79,1.56,1.51,1.69,1.67,1.74,1.6,1.52,1.57,1.57,1.67,1.64,1.5,1.64,1.56,1.56,1.62,1.71,1.83,1.76};
double [] y = {58,78,70,80,77,74,61,65,55,76,54,53,69,67,72,58,53,55,57,66,65,50,63,58,55,63,73,80,76};
LinearRegression lr = new LinearRegression(x,y);
System.out.println("Slope: " + lr.slope());
System.out.println("Intercept: " + lr.intercept());
System.out.println("R-Squared:" + lr.R2());
double [] alturas = {1.46,1.65,1.51,1.57,1.64,1.72,1.75,1.82,1.73,1.47,1.5,1.35,1.77,1.85,1.4,1.41,1.43,1.89,1.9};
for (double altura : alturas) {
System.out.println("Prediction: Height = " + altura + ", weigth = " + lr.predict(altura));
}

Slope: 95.78639630940887
Intercept: -93.41669348782324
R-Squared:0.9775166656373534
Prediction: Height = 1.46, weigth = 46.431445123913704
Prediction: Height = 1.65, weigth = 64.6308604227014
Prediction: Height = 1.51, weigth = 51.22076493938417
Prediction: Height = 1.57, weigth = 56.9679487179487
Prediction: Height = 1.64, weigth = 63.672996459607305
Prediction: Height = 1.72, weigth = 71.33590816436
Prediction: Height = 1.75, weigth = 74.20950005364227
Prediction: Height = 1.82, weigth = 80.9145477953009
Prediction: Height = 1.73, weigth = 72.2937721274541
Prediction: Height = 1.47, weigth = 47.3893090870078
Prediction: Height = 1.5, weigth = 50.26290097629007
Prediction: Height = 1.35, weigth = 35.894941529878736
Prediction: Height = 1.77, weigth = 76.12522797983047
Prediction: Height = 1.85, weigth = 83.78813968458317
Prediction: Height = 1.4, weigth = 40.68426134534917
Prediction: Height = 1.41, weigth = 41.64212530844327
Prediction: Height = 1.43, weigth = 43.55785323463144
Prediction: Height = 1.89, weigth = 87.61959553695951
Prediction: Height = 1.9, weigth = 88.5774595000536
Out[2]:
null

In [ ]: