View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.regression;
18  
19  
20  import org.apache.commons.math4.legacy.TestUtils;
21  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
22  import org.apache.commons.math4.legacy.exception.NullArgumentException;
23  import org.apache.commons.math4.legacy.linear.Array2DRowRealMatrix;
24  import org.apache.commons.math4.legacy.linear.DefaultRealMatrixChangingVisitor;
25  import org.apache.commons.math4.legacy.linear.MatrixUtils;
26  import org.apache.commons.math4.legacy.linear.RealMatrix;
27  import org.apache.commons.math4.legacy.linear.RealVector;
28  import org.apache.commons.math4.legacy.stat.StatUtils;
29  import org.junit.Assert;
30  import org.junit.Before;
31  import org.junit.Test;
32  
33  public class OLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
34  
35      private double[] y;
36      private double[][] x;
37  
38      @Before
39      @Override
40      public void setUp(){
41          y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
42          x = new double[6][];
43          x[0] = new double[]{0, 0, 0, 0, 0};
44          x[1] = new double[]{2.0, 0, 0, 0, 0};
45          x[2] = new double[]{0, 3.0, 0, 0, 0};
46          x[3] = new double[]{0, 0, 4.0, 0, 0};
47          x[4] = new double[]{0, 0, 0, 5.0, 0};
48          x[5] = new double[]{0, 0, 0, 0, 6.0};
49          super.setUp();
50      }
51  
52      @Override
53      protected OLSMultipleLinearRegression createRegression() {
54          OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
55          regression.newSampleData(y, x);
56          return regression;
57      }
58  
59      @Override
60      protected int getNumberOfRegressors() {
61          return x[0].length + 1;
62      }
63  
64      @Override
65      protected int getSampleSize() {
66          return y.length;
67      }
68  
69      @Test(expected=MathIllegalArgumentException.class)
70      public void cannotAddSampleDataWithSizeMismatch() {
71          double[] y = new double[]{1.0, 2.0};
72          double[][] x = new double[1][];
73          x[0] = new double[]{1.0, 0};
74          createRegression().newSampleData(y, x);
75      }
76  
77      @Test
78      public void testPerfectFit() {
79          double[] betaHat = regression.estimateRegressionParameters();
80          TestUtils.assertEquals(betaHat,
81                                 new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
82                                 1e-14);
83          double[] residuals = regression.estimateResiduals();
84          TestUtils.assertEquals(residuals, new double[]{0d,0d,0d,0d,0d,0d},
85                                 1e-14);
86          RealMatrix errors =
87              new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
88          final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
89          RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
90          referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
91              @Override
92              public double visit(int row, int column, double value) {
93                  if (row == 0) {
94                      return s[column];
95                  }
96                  double x = s[row] * s[column];
97                  return (row == column) ? 2 * x : x;
98              }
99          });
100        Assert.assertEquals(0.0,
101                      errors.subtract(referenceVariance).getNorm(),
102                      5.0e-16 * referenceVariance.getNorm());
103        Assert.assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
104     }
105 
106 
107     /**
108      * Test Longley dataset against certified values provided by NIST.
109      * Data Source: J. Longley (1967) "An Appraisal of Least Squares
110      * Programs for the Electronic Computer from the Point of View of the User"
111      * Journal of the American Statistical Association, vol. 62. September,
112      * pp. 819-841.
113      *
114      * Certified values (and data) are from NIST:
115      * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
116      */
117     @Test
118     public void testLongley() {
119         // Y values are first, then independent vars
120         // Each row is one observation
121         double[] design = new double[] {
122             60323,83.0,234289,2356,1590,107608,1947,
123             61122,88.5,259426,2325,1456,108632,1948,
124             60171,88.2,258054,3682,1616,109773,1949,
125             61187,89.5,284599,3351,1650,110929,1950,
126             63221,96.2,328975,2099,3099,112075,1951,
127             63639,98.1,346999,1932,3594,113270,1952,
128             64989,99.0,365385,1870,3547,115094,1953,
129             63761,100.0,363112,3578,3350,116219,1954,
130             66019,101.2,397469,2904,3048,117388,1955,
131             67857,104.6,419180,2822,2857,118734,1956,
132             68169,108.4,442769,2936,2798,120445,1957,
133             66513,110.8,444546,4681,2637,121950,1958,
134             68655,112.6,482704,3813,2552,123366,1959,
135             69564,114.2,502601,3931,2514,125368,1960,
136             69331,115.7,518173,4806,2572,127852,1961,
137             70551,116.9,554894,4007,2827,130081,1962
138         };
139 
140         final int nobs = 16;
141         final int nvars = 6;
142 
143         // Estimate the model
144         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
145         model.newSampleData(design, nobs, nvars);
146 
147         // Check expected beta values from NIST
148         double[] betaHat = model.estimateRegressionParameters();
149         TestUtils.assertEquals(betaHat,
150           new double[]{-3482258.63459582, 15.0618722713733,
151                 -0.358191792925910E-01,-2.02022980381683,
152                 -1.03322686717359,-0.511041056535807E-01,
153                  1829.15146461355}, 2E-8); //
154 
155         // Check expected residuals from R
156         double[] residuals = model.estimateResiduals();
157         TestUtils.assertEquals(residuals, new double[]{
158                 267.340029759711,-94.0139423988359,46.28716775752924,
159                 -410.114621930906,309.7145907602313,-249.3112153297231,
160                 -164.0489563956039,-13.18035686637081,14.30477260005235,
161                  455.394094551857,-17.26892711483297,-39.0550425226967,
162                 -155.5499735953195,-85.6713080421283,341.9315139607727,
163                 -206.7578251937366},
164                       1E-8);
165 
166         // Check standard errors from NIST
167         double[] errors = model.estimateRegressionParametersStandardErrors();
168         TestUtils.assertEquals(new double[] {890420.383607373,
169                        84.9149257747669,
170                        0.334910077722432E-01,
171                        0.488399681651699,
172                        0.214274163161675,
173                        0.226073200069370,
174                        455.478499142212}, errors, 1E-6);
175 
176         // Check regression standard error against R
177         Assert.assertEquals(304.8540735619638, model.estimateRegressionStandardError(), 1E-10);
178 
179         // Check R-Square statistics against R
180         Assert.assertEquals(0.995479004577296, model.calculateRSquared(), 1E-12);
181         Assert.assertEquals(0.992465007628826, model.calculateAdjustedRSquared(), 1E-12);
182 
183         checkVarianceConsistency(model);
184 
185         // Estimate model without intercept
186         model.setNoIntercept(true);
187         model.newSampleData(design, nobs, nvars);
188 
189         // Check expected beta values from R
190         betaHat = model.estimateRegressionParameters();
191         TestUtils.assertEquals(betaHat,
192           new double[]{-52.99357013868291, 0.07107319907358,
193                 -0.42346585566399,-0.57256866841929,
194                 -0.41420358884978, 48.41786562001326}, 1E-11);
195 
196         // Check standard errors from R
197         errors = model.estimateRegressionParametersStandardErrors();
198         TestUtils.assertEquals(new double[] {129.54486693117232, 0.03016640003786,
199                 0.41773654056612, 0.27899087467676, 0.32128496193363,
200                 17.68948737819961}, errors, 1E-11);
201 
202         // Check expected residuals from R
203         residuals = model.estimateResiduals();
204         TestUtils.assertEquals(residuals, new double[]{
205                 279.90274927293092, -130.32465380836874, 90.73228661967445, -401.31252201634948,
206                 -440.46768772620027, -543.54512853774793, 201.32111639536299, 215.90889365977932,
207                 73.09368242049943, 913.21694494481869, 424.82484953610174, -8.56475876776709,
208                 -361.32974610842876, 27.34560497213464, 151.28955976355002, -492.49937355336846},
209                       1E-10);
210 
211         // Check regression standard error against R
212         Assert.assertEquals(475.1655079819517, model.estimateRegressionStandardError(), 1E-10);
213 
214         // Check R-Square statistics against R
215         Assert.assertEquals(0.9999670130706, model.calculateRSquared(), 1E-12);
216         Assert.assertEquals(0.999947220913, model.calculateAdjustedRSquared(), 1E-12);
217     }
218 
219     /**
220      * Test R Swiss fertility dataset against R.
221      * Data Source: R datasets package
222      */
223     @Test
224     public void testSwissFertility() {
225         double[] design = new double[] {
226             80.2,17.0,15,12,9.96,
227             83.1,45.1,6,9,84.84,
228             92.5,39.7,5,5,93.40,
229             85.8,36.5,12,7,33.77,
230             76.9,43.5,17,15,5.16,
231             76.1,35.3,9,7,90.57,
232             83.8,70.2,16,7,92.85,
233             92.4,67.8,14,8,97.16,
234             82.4,53.3,12,7,97.67,
235             82.9,45.2,16,13,91.38,
236             87.1,64.5,14,6,98.61,
237             64.1,62.0,21,12,8.52,
238             66.9,67.5,14,7,2.27,
239             68.9,60.7,19,12,4.43,
240             61.7,69.3,22,5,2.82,
241             68.3,72.6,18,2,24.20,
242             71.7,34.0,17,8,3.30,
243             55.7,19.4,26,28,12.11,
244             54.3,15.2,31,20,2.15,
245             65.1,73.0,19,9,2.84,
246             65.5,59.8,22,10,5.23,
247             65.0,55.1,14,3,4.52,
248             56.6,50.9,22,12,15.14,
249             57.4,54.1,20,6,4.20,
250             72.5,71.2,12,1,2.40,
251             74.2,58.1,14,8,5.23,
252             72.0,63.5,6,3,2.56,
253             60.5,60.8,16,10,7.72,
254             58.3,26.8,25,19,18.46,
255             65.4,49.5,15,8,6.10,
256             75.5,85.9,3,2,99.71,
257             69.3,84.9,7,6,99.68,
258             77.3,89.7,5,2,100.00,
259             70.5,78.2,12,6,98.96,
260             79.4,64.9,7,3,98.22,
261             65.0,75.9,9,9,99.06,
262             92.2,84.6,3,3,99.46,
263             79.3,63.1,13,13,96.83,
264             70.4,38.4,26,12,5.62,
265             65.7,7.7,29,11,13.79,
266             72.7,16.7,22,13,11.22,
267             64.4,17.6,35,32,16.92,
268             77.6,37.6,15,7,4.97,
269             67.6,18.7,25,7,8.65,
270             35.0,1.2,37,53,42.34,
271             44.7,46.6,16,29,50.43,
272             42.8,27.7,22,29,58.33
273         };
274 
275         final int nobs = 47;
276         final int nvars = 4;
277 
278         // Estimate the model
279         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
280         model.newSampleData(design, nobs, nvars);
281 
282         // Check expected beta values from R
283         double[] betaHat = model.estimateRegressionParameters();
284         TestUtils.assertEquals(betaHat,
285                 new double[]{91.05542390271397,
286                 -0.22064551045715,
287                 -0.26058239824328,
288                 -0.96161238456030,
289                  0.12441843147162}, 1E-12);
290 
291         // Check expected residuals from R
292         double[] residuals = model.estimateResiduals();
293         TestUtils.assertEquals(residuals, new double[]{
294                 7.1044267859730512,1.6580347433531366,
295                 4.6944952770029644,8.4548022690166160,13.6547432343186212,
296                -9.3586864458500774,7.5822446330520386,15.5568995563859289,
297                 0.8113090736598980,7.1186762732484308,7.4251378771228724,
298                 2.6761316873234109,0.8351584810309354,7.1769991119615177,
299                -3.8746753206299553,-3.1337779476387251,-0.1412575244091504,
300                 1.1186809170469780,-6.3588097346816594,3.4039270429434074,
301                 2.3374058329820175,-7.9272368576900503,-7.8361010968497959,
302                -11.2597369269357070,0.9445333697827101,6.6544245101380328,
303                -0.9146136301118665,-4.3152449403848570,-4.3536932047009183,
304                -3.8907885169304661,-6.3027643926302188,-7.8308982189289091,
305                -3.1792280015332750,-6.7167298771158226,-4.8469946718041754,
306                -10.6335664353633685,11.1031134362036958,6.0084032641811733,
307                 5.4326230830188482,-7.2375578629692230,2.1671550814448222,
308                 15.0147574652763112,4.8625103516321015,-7.1597256413907706,
309                 -0.4515205619767598,-10.2916870903837587,-15.7812984571900063},
310                 1E-12);
311 
312         // Check standard errors from R
313         double[] errors = model.estimateRegressionParametersStandardErrors();
314         TestUtils.assertEquals(new double[] {6.94881329475087,
315                 0.07360008972340,
316                 0.27410957467466,
317                 0.19454551679325,
318                 0.03726654773803}, errors, 1E-10);
319 
320         // Check regression standard error against R
321         Assert.assertEquals(7.73642194433223, model.estimateRegressionStandardError(), 1E-12);
322 
323         // Check R-Square statistics against R
324         Assert.assertEquals(0.649789742860228, model.calculateRSquared(), 1E-12);
325         Assert.assertEquals(0.6164363850373927, model.calculateAdjustedRSquared(), 1E-12);
326 
327         checkVarianceConsistency(model);
328 
329         // Estimate the model with no intercept
330         model = new OLSMultipleLinearRegression();
331         model.setNoIntercept(true);
332         model.newSampleData(design, nobs, nvars);
333 
334         // Check expected beta values from R
335         betaHat = model.estimateRegressionParameters();
336         TestUtils.assertEquals(betaHat,
337                 new double[]{0.52191832900513,
338                   2.36588087917963,
339                   -0.94770353802795,
340                   0.30851985863609}, 1E-12);
341 
342         // Check expected residuals from R
343         residuals = model.estimateResiduals();
344         TestUtils.assertEquals(residuals, new double[]{
345                 44.138759883538249, 27.720705122356215, 35.873200836126799,
346                 34.574619581211977, 26.600168342080213, 15.074636243026923, -12.704904871199814,
347                 1.497443824078134, 2.691972687079431, 5.582798774291231, -4.422986561283165,
348                 -9.198581600334345, 4.481765170730647, 2.273520207553216, -22.649827853221336,
349                 -17.747900013943308, 20.298314638496436, 6.861405135329779, -8.684712790954924,
350                 -10.298639278062371, -9.896618896845819, 4.568568616351242, -15.313570491727944,
351                 -13.762961360873966, 7.156100301980509, 16.722282219843990, 26.716200609071898,
352                 -1.991466398777079, -2.523342564719335, 9.776486693095093, -5.297535127628603,
353                 -16.639070567471094, -10.302057295211819, -23.549487860816846, 1.506624392156384,
354                 -17.939174438345930, 13.105792202765040, -1.943329906928462, -1.516005841666695,
355                 -0.759066561832886, 20.793137744128977, -2.485236153005426, 27.588238710486976,
356                 2.658333257106881, -15.998337823623046, -5.550742066720694, -14.219077806826615},
357                 1E-12);
358 
359         // Check standard errors from R
360         errors = model.estimateRegressionParametersStandardErrors();
361         TestUtils.assertEquals(new double[] {0.10470063765677, 0.41684100584290,
362                 0.43370143099691, 0.07694953606522}, errors, 1E-10);
363 
364         // Check regression standard error against R
365         Assert.assertEquals(17.24710630547, model.estimateRegressionStandardError(), 1E-10);
366 
367         // Check R-Square statistics against R
368         Assert.assertEquals(0.946350722085, model.calculateRSquared(), 1E-12);
369         Assert.assertEquals(0.9413600915813, model.calculateAdjustedRSquared(), 1E-12);
370     }
371 
372     /**
373      * Test hat matrix computation
374      *
375      */
376     @Test
377     public void testHat() {
378 
379         /*
380          * This example is from "The Hat Matrix in Regression and ANOVA",
381          * David C. Hoaglin and Roy E. Welsch,
382          * The American Statistician, Vol. 32, No. 1 (Feb., 1978), pp. 17-22.
383          *
384          */
385         double[] design = new double[] {
386                 11.14, .499, 11.1,
387                 12.74, .558, 8.9,
388                 13.13, .604, 8.8,
389                 11.51, .441, 8.9,
390                 12.38, .550, 8.8,
391                 12.60, .528, 9.9,
392                 11.13, .418, 10.7,
393                 11.7, .480, 10.5,
394                 11.02, .406, 10.5,
395                 11.41, .467, 10.7
396         };
397 
398         int nobs = 10;
399         int nvars = 2;
400 
401         // Estimate the model
402         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
403         model.newSampleData(design, nobs, nvars);
404 
405         RealMatrix hat = model.calculateHat();
406 
407         // Reference data is upper half of symmetric hat matrix
408         double[] referenceData = new double[] {
409                 .418, -.002,  .079, -.274, -.046,  .181,  .128,  .222,  .050,  .242,
410                        .242,  .292,  .136,  .243,  .128, -.041,  .033, -.035,  .004,
411                               .417, -.019,  .273,  .187, -.126,  .044, -.153,  .004,
412                                      .604,  .197, -.038,  .168, -.022,  .275, -.028,
413                                             .252,  .111, -.030,  .019, -.010, -.010,
414                                                    .148,  .042,  .117,  .012,  .111,
415                                                           .262,  .145,  .277,  .174,
416                                                                  .154,  .120,  .168,
417                                                                         .315,  .148,
418                                                                                .187
419         };
420 
421         // Check against reference data and verify symmetry
422         int k = 0;
423         for (int i = 0; i < 10; i++) {
424             for (int j = i; j < 10; j++) {
425                 Assert.assertEquals(referenceData[k], hat.getEntry(i, j), 10e-3);
426                 Assert.assertEquals(hat.getEntry(i, j), hat.getEntry(j, i), 10e-12);
427                 k++;
428             }
429         }
430 
431         /*
432          * Verify that residuals computed using the hat matrix are close to
433          * what we get from direct computation, i.e. r = (I - H) y
434          */
435         double[] residuals = model.estimateResiduals();
436         RealMatrix I = MatrixUtils.createRealIdentityMatrix(10);
437         double[] hatResiduals = I.subtract(hat).operate(model.getY()).toArray();
438         TestUtils.assertEquals(residuals, hatResiduals, 10e-12);
439     }
440 
441     /**
442      * test calculateYVariance
443      */
444     @Test
445     public void testYVariance() {
446 
447         // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
448 
449         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
450         model.newSampleData(y, x);
451         TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
452     }
453 
454     /**
455      * Verifies that calculateYVariance and calculateResidualVariance return consistent
456      * values with direct variance computation from Y, residuals, respectively.
457      */
458     protected void checkVarianceConsistency(OLSMultipleLinearRegression model) {
459         // Check Y variance consistency
460         TestUtils.assertEquals(StatUtils.variance(model.getY().toArray()), model.calculateYVariance(), 0);
461 
462         // Check residual variance consistency
463         double[] residuals = model.calculateResiduals().toArray();
464         RealMatrix X = model.getX();
465         TestUtils.assertEquals(
466                 StatUtils.variance(model.calculateResiduals().toArray()) * (residuals.length - 1),
467                 model.calculateErrorVariance() * (X.getRowDimension() - X.getColumnDimension()), 1E-20);
468     }
469 
470     /**
471      * Verifies that setting X and Y separately has the same effect as newSample(X,Y).
472      */
473     @Test
474     public void testNewSample2() {
475         double[] y = new double[] {1, 2, 3, 4};
476         double[][] x = new double[][] {
477           {19, 22, 33},
478           {20, 30, 40},
479           {25, 35, 45},
480           {27, 37, 47}
481         };
482         OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
483         regression.newSampleData(y, x);
484         RealMatrix combinedX = regression.getX().copy();
485         RealVector combinedY = regression.getY().copy();
486         regression.newXSampleData(x);
487         regression.newYSampleData(y);
488         Assert.assertEquals(combinedX, regression.getX());
489         Assert.assertEquals(combinedY, regression.getY());
490 
491         // No intercept
492         regression.setNoIntercept(true);
493         regression.newSampleData(y, x);
494         combinedX = regression.getX().copy();
495         combinedY = regression.getY().copy();
496         regression.newXSampleData(x);
497         regression.newYSampleData(y);
498         Assert.assertEquals(combinedX, regression.getX());
499         Assert.assertEquals(combinedY, regression.getY());
500     }
501 
502     @Test(expected=NullArgumentException.class)
503     public void testNewSampleDataYNull() {
504         createRegression().newSampleData(null, new double[][] {});
505     }
506 
507     @Test(expected=NullArgumentException.class)
508     public void testNewSampleDataXNull() {
509         createRegression().newSampleData(new double[] {}, null);
510     }
511 
512      /*
513      * This is a test based on the Wampler1 data set
514      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler1.shtml
515      */
516     @Test
517     public void testWampler1() {
518         double[] data = new double[]{
519             1, 0,
520             6, 1,
521             63, 2,
522             364, 3,
523             1365, 4,
524             3906, 5,
525             9331, 6,
526             19608, 7,
527             37449, 8,
528             66430, 9,
529             111111, 10,
530             177156, 11,
531             271453, 12,
532             402234, 13,
533             579195, 14,
534             813616, 15,
535             1118481, 16,
536             1508598, 17,
537             2000719, 18,
538             2613660, 19,
539             3368421, 20};
540         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
541 
542 
543         final int nvars = 5;
544         final int nobs = 21;
545         double[] tmp = new double[(nvars + 1) * nobs];
546         int off = 0;
547         int off2 = 0;
548         for (int i = 0; i < nobs; i++) {
549             tmp[off2] = data[off];
550             tmp[off2 + 1] = data[off + 1];
551             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
552             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
553             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
554             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
555             off2 += nvars + 1;
556             off += 2;
557         }
558         model.newSampleData(tmp, nobs, nvars);
559         double[] betaHat = model.estimateRegressionParameters();
560         TestUtils.assertEquals(betaHat,
561                 new double[]{1.0,
562                     1.0, 1.0,
563                     1.0, 1.0,
564                     1.0}, 1E-8);
565 
566         double[] se = model.estimateRegressionParametersStandardErrors();
567         TestUtils.assertEquals(se,
568                 new double[]{0.0,
569                     0.0, 0.0,
570                     0.0, 0.0,
571                     0.0}, 1E-8);
572 
573         TestUtils.assertEquals(1.0, model.calculateRSquared(), 1.0e-10);
574         TestUtils.assertEquals(0, model.estimateErrorVariance(), 1.0e-7);
575         TestUtils.assertEquals(0.00, model.calculateResidualSumOfSquares(), 1.0e-6);
576 
577         return;
578     }
579 
580     /*
581      * This is a test based on the Wampler2 data set
582      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler2.shtml
583      */
584     @Test
585     public void testWampler2() {
586         double[] data = new double[]{
587             1.00000, 0,
588             1.11111, 1,
589             1.24992, 2,
590             1.42753, 3,
591             1.65984, 4,
592             1.96875, 5,
593             2.38336, 6,
594             2.94117, 7,
595             3.68928, 8,
596             4.68559, 9,
597             6.00000, 10,
598             7.71561, 11,
599             9.92992, 12,
600             12.75603, 13,
601             16.32384, 14,
602             20.78125, 15,
603             26.29536, 16,
604             33.05367, 17,
605             41.26528, 18,
606             51.16209, 19,
607             63.00000, 20};
608         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
609 
610 
611         final int nvars = 5;
612         final int nobs = 21;
613         double[] tmp = new double[(nvars + 1) * nobs];
614         int off = 0;
615         int off2 = 0;
616         for (int i = 0; i < nobs; i++) {
617             tmp[off2] = data[off];
618             tmp[off2 + 1] = data[off + 1];
619             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
620             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
621             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
622             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
623             off2 += nvars + 1;
624             off += 2;
625         }
626         model.newSampleData(tmp, nobs, nvars);
627         double[] betaHat = model.estimateRegressionParameters();
628         TestUtils.assertEquals(betaHat,
629                 new double[]{
630                     1.0,
631                     1.0e-1,
632                     1.0e-2,
633                     1.0e-3, 1.0e-4,
634                     1.0e-5}, 1E-8);
635 
636         double[] se = model.estimateRegressionParametersStandardErrors();
637         TestUtils.assertEquals(se,
638                 new double[]{0.0,
639                     0.0, 0.0,
640                     0.0, 0.0,
641                     0.0}, 1E-8);
642         TestUtils.assertEquals(1.0, model.calculateRSquared(), 1.0e-10);
643         TestUtils.assertEquals(0, model.estimateErrorVariance(), 1.0e-7);
644         TestUtils.assertEquals(0.00, model.calculateResidualSumOfSquares(), 1.0e-6);
645         return;
646     }
647 
648     /*
649      * This is a test based on the Wampler3 data set
650      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler3.shtml
651      */
652     @Test
653     public void testWampler3() {
654         double[] data = new double[]{
655             760, 0,
656             -2042, 1,
657             2111, 2,
658             -1684, 3,
659             3888, 4,
660             1858, 5,
661             11379, 6,
662             17560, 7,
663             39287, 8,
664             64382, 9,
665             113159, 10,
666             175108, 11,
667             273291, 12,
668             400186, 13,
669             581243, 14,
670             811568, 15,
671             1121004, 16,
672             1506550, 17,
673             2002767, 18,
674             2611612, 19,
675             3369180, 20};
676 
677         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
678         final int nvars = 5;
679         final int nobs = 21;
680         double[] tmp = new double[(nvars + 1) * nobs];
681         int off = 0;
682         int off2 = 0;
683         for (int i = 0; i < nobs; i++) {
684             tmp[off2] = data[off];
685             tmp[off2 + 1] = data[off + 1];
686             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
687             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
688             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
689             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
690             off2 += nvars + 1;
691             off += 2;
692         }
693         model.newSampleData(tmp, nobs, nvars);
694         double[] betaHat = model.estimateRegressionParameters();
695         TestUtils.assertEquals(betaHat,
696                 new double[]{
697                     1.0,
698                     1.0,
699                     1.0,
700                     1.0,
701                     1.0,
702                     1.0}, 1E-8);
703 
704         double[] se = model.estimateRegressionParametersStandardErrors();
705         TestUtils.assertEquals(se,
706                 new double[]{2152.32624678170,
707                     2363.55173469681, 779.343524331583,
708                     101.475507550350, 5.64566512170752,
709                     0.112324854679312}, 1E-8); //
710 
711         TestUtils.assertEquals(.999995559025820, model.calculateRSquared(), 1.0e-10);
712         TestUtils.assertEquals(5570284.53333333, model.estimateErrorVariance(), 1.0e-6);
713         TestUtils.assertEquals(83554268.0000000, model.calculateResidualSumOfSquares(), 1.0e-5);
714         return;
715     }
716 
717     /*
718      * This is a test based on the Wampler4 data set
719      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler4.shtml
720      */
721     @Test
722     public void testWampler4() {
723         double[] data = new double[]{
724             75901, 0,
725             -204794, 1,
726             204863, 2,
727             -204436, 3,
728             253665, 4,
729             -200894, 5,
730             214131, 6,
731             -185192, 7,
732             221249, 8,
733             -138370, 9,
734             315911, 10,
735             -27644, 11,
736             455253, 12,
737             197434, 13,
738             783995, 14,
739             608816, 15,
740             1370781, 16,
741             1303798, 17,
742             2205519, 18,
743             2408860, 19,
744             3444321, 20};
745 
746         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
747         final int nvars = 5;
748         final int nobs = 21;
749         double[] tmp = new double[(nvars + 1) * nobs];
750         int off = 0;
751         int off2 = 0;
752         for (int i = 0; i < nobs; i++) {
753             tmp[off2] = data[off];
754             tmp[off2 + 1] = data[off + 1];
755             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
756             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
757             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
758             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
759             off2 += nvars + 1;
760             off += 2;
761         }
762         model.newSampleData(tmp, nobs, nvars);
763         double[] betaHat = model.estimateRegressionParameters();
764         TestUtils.assertEquals(betaHat,
765                 new double[]{
766                     1.0,
767                     1.0,
768                     1.0,
769                     1.0,
770                     1.0,
771                     1.0}, 1E-6);
772 
773         double[] se = model.estimateRegressionParametersStandardErrors();
774         TestUtils.assertEquals(se,
775                 new double[]{215232.624678170,
776                     236355.173469681, 77934.3524331583,
777                     10147.5507550350, 564.566512170752,
778                     11.2324854679312}, 1E-8);
779 
780         TestUtils.assertEquals(.957478440825662, model.calculateRSquared(), 1.0e-10);
781         TestUtils.assertEquals(55702845333.3333, model.estimateErrorVariance(), 1.0e-4);
782         TestUtils.assertEquals(835542680000.000, model.calculateResidualSumOfSquares(), 1.0e-3);
783         return;
784     }
785 
786     /**
787      * Anything requiring beta calculation should advertise SME.
788      */
789     @Test(expected=org.apache.commons.math4.legacy.linear.SingularMatrixException.class)
790     public void testSingularCalculateBeta() {
791         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
792         model.newSampleData(new double[] {1,  2,  3, 1, 2, 3, 1, 2, 3}, 3, 2);
793         model.calculateBeta();
794     }
795 
796     @Test
797     public void testNoSSTOCalculateRsquare() {
798         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
799         model.newSampleData(new double[] {1,  2,  3, 1, 7, 8, 1, 10, 12}, 3, 2);
800         Assert.assertTrue(Double.isNaN(model.calculateRSquared()));
801     }
802 
803     @Test(expected=NullPointerException.class)
804     public void testNoDataNPECalculateBeta() {
805         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
806         model.calculateBeta();
807     }
808 
809     @Test(expected=NullPointerException.class)
810     public void testNoDataNPECalculateHat() {
811         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
812         model.calculateHat();
813     }
814 
815     @Test(expected=NullPointerException.class)
816     public void testNoDataNPESSTO() {
817         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
818         model.calculateTotalSumOfSquares();
819     }
820 }