View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.regression;
18  
19  import java.util.Random;
20  
21  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
22  import org.apache.commons.math4.legacy.exception.OutOfRangeException;
23  import org.apache.commons.rng.UniformRandomProvider;
24  import org.apache.commons.rng.simple.RandomSource;
25  import org.apache.commons.math4.core.jdkmath.JdkMath;
26  import org.junit.Assert;
27  import org.junit.Test;
28  
29  
30  /**
31   * Test cases for the TestStatistic class.
32   *
33   */
34  
35  public final class SimpleRegressionTest {
36  
37      /*
38       * NIST "Norris" reference data set from
39       * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
40       * Strangely, order is {y,x}
41       */
42      private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 },
43              {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 },
44              {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 },
45              {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 },
46              {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 },
47              {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 },
48              {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 },
49              {449.2, 448.9 }, {0.2, 0.5 }
50      };
51  
52      /*
53       * Correlation example from
54       * http://www.xycoon.com/correlation.htm
55       */
56      private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 },
57              {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 },
58              {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 },
59              {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
60      };
61  
62      /*
63       * From Moore and Mcabe, "Introduction to the Practice of Statistics"
64       * Example 10.3
65       */
66      private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 },
67              {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
68      };
69  
70      /*
71       * Points to remove in the remove tests
72       */
73      private double[][] removeSingle = {infData[1]};
74      private double[][] removeMultiple = { infData[1], infData[2] };
75      private double removeX = infData[0][0];
76      private double removeY = infData[0][1];
77  
78  
79      /*
80       * Data with bad linear fit
81       */
82      private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 },
83              {5, -1 }, {6, 12 }
84      };
85  
86  
87      /*
88       * Data from NIST NOINT1
89       */
90      private double[][] noint1 = {
91          {130.0,60.0},
92          {131.0,61.0},
93          {132.0,62.0},
94          {133.0,63.0},
95          {134.0,64.0},
96          {135.0,65.0},
97          {136.0,66.0},
98          {137.0,67.0},
99          {138.0,68.0},
100         {139.0,69.0},
101         {140.0,70.0}
102     };
103 
104     /*
105      * Data from NIST NOINT2
106      *
107      */
108     private double[][] noint2 = {
109         {3.0,4},
110         {4,5},
111         {4,6}
112     };
113 
114 
115     /**
116      * Test that the SimpleRegression objects generated from combining two
117      * SimpleRegression objects created from subsets of data are identical to
118      * SimpleRegression objects created from the combined data.
119      */
120     @Test
121     public void testAppend() {
122         check(false);
123         check(true);
124     }
125 
126     /**
127      * Checks that adding data to a single model gives the same result
128      * as adding "parts" of the dataset to smaller models and using append
129      * to aggregate the smaller models.
130      *
131      * @param includeIntercept
132      */
133     private void check(boolean includeIntercept) {
134         final int sets = 2;
135         final UniformRandomProvider rand = RandomSource.ISAAC.create(10L);// Seed can be changed
136         final SimpleRegression whole = new SimpleRegression(includeIntercept);// regression of the whole set
137         final SimpleRegression parts = new SimpleRegression(includeIntercept);// regression with parts.
138 
139         for (int s = 0; s < sets; s++) {// loop through each subset of data.
140             final double coef = rand.nextDouble();
141             final SimpleRegression sub = new SimpleRegression(includeIntercept);// sub regression
142             for (int i = 0; i < 5; i++) { // loop through individual samlpes.
143                 final double x = rand.nextDouble();
144                 final double y = x * coef + rand.nextDouble();// some noise
145                 sub.addData(x, y);
146                 whole.addData(x, y);
147             }
148             parts.append(sub);
149             Assert.assertTrue(equals(parts, whole, 1E-6));
150         }
151     }
152 
153     /**
154      * Returns true iff the statistics reported by model1 are all within tol of
155      * those reported by model2.
156      *
157      * @param model1 first model
158      * @param model2 second model
159      * @param tol tolerance
160      * @return true if the two models report the same regression stats
161      */
162     private boolean equals(SimpleRegression model1, SimpleRegression model2, double tol) {
163         if (model1.getN() != model2.getN()) {
164             return false;
165         }
166         if (JdkMath.abs(model1.getIntercept() - model2.getIntercept()) > tol) {
167             return false;
168         }
169         if (JdkMath.abs(model1.getInterceptStdErr() - model2.getInterceptStdErr()) > tol) {
170             return false;
171         }
172         if (JdkMath.abs(model1.getMeanSquareError() - model2.getMeanSquareError()) > tol) {
173             return false;
174         }
175         if (JdkMath.abs(model1.getR() - model2.getR()) > tol) {
176             return false;
177         }
178         if (JdkMath.abs(model1.getRegressionSumSquares() - model2.getRegressionSumSquares()) > tol) {
179             return false;
180         }
181         if (JdkMath.abs(model1.getRSquare() - model2.getRSquare()) > tol) {
182             return false;
183         }
184         if (JdkMath.abs(model1.getSignificance() - model2.getSignificance()) > tol) {
185             return false;
186         }
187         if (JdkMath.abs(model1.getSlope() - model2.getSlope()) > tol) {
188             return false;
189         }
190         if (JdkMath.abs(model1.getSlopeConfidenceInterval() - model2.getSlopeConfidenceInterval()) > tol) {
191             return false;
192         }
193         if (JdkMath.abs(model1.getSlopeStdErr() - model2.getSlopeStdErr()) > tol) {
194             return false;
195         }
196         if (JdkMath.abs(model1.getSumOfCrossProducts() - model2.getSumOfCrossProducts()) > tol) {
197             return false;
198         }
199         if (JdkMath.abs(model1.getSumSquaredErrors() - model2.getSumSquaredErrors()) > tol) {
200             return false;
201         }
202         if (JdkMath.abs(model1.getTotalSumSquares() - model2.getTotalSumSquares()) > tol) {
203             return false;
204         }
205         if (JdkMath.abs(model1.getXSumSquares() - model2.getXSumSquares()) > tol) {
206             return false;
207         }
208         return true;
209     }
210 
211     @Test
212     public void testRegressIfaceMethod(){
213         final SimpleRegression regression = new SimpleRegression(true);
214         final UpdatingMultipleLinearRegression iface = regression;
215         final SimpleRegression regressionNoint = new SimpleRegression( false );
216         final SimpleRegression regressionIntOnly= new SimpleRegression( false );
217         for (int i = 0; i < data.length; i++) {
218             iface.addObservation( new double[]{data[i][1]}, data[i][0]);
219             regressionNoint.addData(data[i][1], data[i][0]);
220             regressionIntOnly.addData(1.0, data[i][0]);
221         }
222 
223         //should not be null
224         final RegressionResults fullReg = iface.regress( );
225         Assert.assertNotNull(fullReg);
226         Assert.assertEquals("intercept", regression.getIntercept(), fullReg.getParameterEstimate(0), 1.0e-16);
227         Assert.assertEquals("intercept std err",regression.getInterceptStdErr(), fullReg.getStdErrorOfEstimate(0),1.0E-16);
228         Assert.assertEquals("slope", regression.getSlope(), fullReg.getParameterEstimate(1), 1.0e-16);
229         Assert.assertEquals("slope std err",regression.getSlopeStdErr(), fullReg.getStdErrorOfEstimate(1),1.0E-16);
230         Assert.assertEquals("number of observations",regression.getN(), fullReg.getN());
231         Assert.assertEquals("r-square",regression.getRSquare(), fullReg.getRSquared(), 1.0E-16);
232         Assert.assertEquals("SSR", regression.getRegressionSumSquares(), fullReg.getRegressionSumSquares() ,1.0E-16);
233         Assert.assertEquals("MSE", regression.getMeanSquareError(), fullReg.getMeanSquareError() ,1.0E-16);
234         Assert.assertEquals("SSE", regression.getSumSquaredErrors(), fullReg.getErrorSumSquares() ,1.0E-16);
235 
236 
237         final RegressionResults noInt   = iface.regress( new int[]{1} );
238         Assert.assertNotNull(noInt);
239         Assert.assertEquals("slope", regressionNoint.getSlope(), noInt.getParameterEstimate(0), 1.0e-12);
240         Assert.assertEquals("slope std err",regressionNoint.getSlopeStdErr(), noInt.getStdErrorOfEstimate(0),1.0E-16);
241         Assert.assertEquals("number of observations",regressionNoint.getN(), noInt.getN());
242         Assert.assertEquals("r-square",regressionNoint.getRSquare(), noInt.getRSquared(), 1.0E-16);
243         Assert.assertEquals("SSR", regressionNoint.getRegressionSumSquares(), noInt.getRegressionSumSquares() ,1.0E-8);
244         Assert.assertEquals("MSE", regressionNoint.getMeanSquareError(), noInt.getMeanSquareError() ,1.0E-16);
245         Assert.assertEquals("SSE", regressionNoint.getSumSquaredErrors(), noInt.getErrorSumSquares() ,1.0E-16);
246 
247         final RegressionResults onlyInt = iface.regress( new int[]{0} );
248         Assert.assertNotNull(onlyInt);
249         Assert.assertEquals("slope", regressionIntOnly.getSlope(), onlyInt.getParameterEstimate(0), 1.0e-12);
250         Assert.assertEquals("slope std err",regressionIntOnly.getSlopeStdErr(), onlyInt.getStdErrorOfEstimate(0),1.0E-12);
251         Assert.assertEquals("number of observations",regressionIntOnly.getN(), onlyInt.getN());
252         Assert.assertEquals("r-square",regressionIntOnly.getRSquare(), onlyInt.getRSquared(), 1.0E-14);
253         Assert.assertEquals("SSE", regressionIntOnly.getSumSquaredErrors(), onlyInt.getErrorSumSquares() ,1.0E-8);
254         Assert.assertEquals("SSR", regressionIntOnly.getRegressionSumSquares(), onlyInt.getRegressionSumSquares() ,1.0E-8);
255         Assert.assertEquals("MSE", regressionIntOnly.getMeanSquareError(), onlyInt.getMeanSquareError() ,1.0E-8);
256     }
257 
258     /**
259      * Verify that regress generates exceptions as advertised for bad model specifications.
260      */
261     @Test
262     public void testRegressExceptions() {
263         // No intercept
264         final SimpleRegression noIntRegression = new SimpleRegression(false);
265         noIntRegression.addData(noint2[0][1], noint2[0][0]);
266         noIntRegression.addData(noint2[1][1], noint2[1][0]);
267         noIntRegression.addData(noint2[2][1], noint2[2][0]);
268         try { // null array
269             noIntRegression.regress(null);
270             Assert.fail("Expecting MathIllegalArgumentException for null array");
271         } catch (MathIllegalArgumentException ex) {
272             // Expected
273         }
274         try { // empty array
275             noIntRegression.regress(new int[] {});
276             Assert.fail("Expecting MathIllegalArgumentException for empty array");
277         } catch (MathIllegalArgumentException ex) {
278             // Expected
279         }
280         try { // more than 1 regressor
281             noIntRegression.regress(new int[] {0, 1});
282             Assert.fail("Expecting ModelSpecificationException - too many regressors");
283         } catch (ModelSpecificationException ex) {
284             // Expected
285         }
286         try { // invalid regressor
287             noIntRegression.regress(new int[] {1});
288             Assert.fail("Expecting OutOfRangeException - invalid regression");
289         } catch (OutOfRangeException ex) {
290             // Expected
291         }
292 
293         // With intercept
294         final SimpleRegression regression = new SimpleRegression(true);
295         regression.addData(noint2[0][1], noint2[0][0]);
296         regression.addData(noint2[1][1], noint2[1][0]);
297         regression.addData(noint2[2][1], noint2[2][0]);
298         try { // null array
299             regression.regress(null);
300             Assert.fail("Expecting MathIllegalArgumentException for null array");
301         } catch (MathIllegalArgumentException ex) {
302             // Expected
303         }
304         try { // empty array
305             regression.regress(new int[] {});
306             Assert.fail("Expecting MathIllegalArgumentException for empty array");
307         } catch (MathIllegalArgumentException ex) {
308             // Expected
309         }
310         try { // more than 2 regressors
311             regression.regress(new int[] {0, 1, 2});
312             Assert.fail("Expecting ModelSpecificationException - too many regressors");
313         } catch (ModelSpecificationException ex) {
314             // Expected
315         }
316         try { // wrong order
317             regression.regress(new int[] {1,0});
318             Assert.fail("Expecting ModelSpecificationException - invalid regression");
319         } catch (ModelSpecificationException ex) {
320             // Expected
321         }
322         try { // out of range
323             regression.regress(new int[] {3,4});
324             Assert.fail("Expecting OutOfRangeException");
325         } catch (OutOfRangeException ex) {
326             // Expected
327         }
328         try { // out of range
329             regression.regress(new int[] {0,2});
330             Assert.fail("Expecting OutOfRangeException");
331         } catch (OutOfRangeException ex) {
332             // Expected
333         }
334         try { // out of range
335             regression.regress(new int[] {2});
336             Assert.fail("Expecting OutOfRangeException");
337         } catch (OutOfRangeException ex) {
338             // Expected
339         }
340     }
341 
342     @Test
343     public void testNoInterceot_noint2(){
344          SimpleRegression regression = new SimpleRegression(false);
345          regression.addData(noint2[0][1], noint2[0][0]);
346          regression.addData(noint2[1][1], noint2[1][0]);
347          regression.addData(noint2[2][1], noint2[2][0]);
348          Assert.assertEquals("intercept", 0, regression.getIntercept(), 0);
349          Assert.assertEquals("slope", 0.727272727272727,
350                  regression.getSlope(), 10E-12);
351          Assert.assertEquals("slope std err", 0.420827318078432E-01,
352                 regression.getSlopeStdErr(),10E-12);
353         Assert.assertEquals("number of observations", 3, regression.getN());
354         Assert.assertEquals("r-square", 0.993348115299335,
355             regression.getRSquare(), 10E-12);
356         Assert.assertEquals("SSR", 40.7272727272727,
357             regression.getRegressionSumSquares(), 10E-9);
358         Assert.assertEquals("MSE", 0.136363636363636,
359             regression.getMeanSquareError(), 10E-10);
360         Assert.assertEquals("SSE", 0.272727272727273,
361             regression.getSumSquaredErrors(),10E-9);
362     }
363 
364     @Test
365     public void testNoIntercept_noint1(){
366         SimpleRegression regression = new SimpleRegression(false);
367         for (int i = 0; i < noint1.length; i++) {
368             regression.addData(noint1[i][1], noint1[i][0]);
369         }
370         Assert.assertEquals("intercept", 0, regression.getIntercept(), 0);
371         Assert.assertEquals("slope", 2.07438016528926, regression.getSlope(), 10E-12);
372         Assert.assertEquals("slope std err", 0.165289256198347E-01,
373                 regression.getSlopeStdErr(),10E-12);
374         Assert.assertEquals("number of observations", 11, regression.getN());
375         Assert.assertEquals("r-square", 0.999365492298663,
376             regression.getRSquare(), 10E-12);
377         Assert.assertEquals("SSR", 200457.727272727,
378             regression.getRegressionSumSquares(), 10E-9);
379         Assert.assertEquals("MSE", 12.7272727272727,
380             regression.getMeanSquareError(), 10E-10);
381         Assert.assertEquals("SSE", 127.272727272727,
382             regression.getSumSquaredErrors(),10E-9);
383     }
384 
385     @Test
386     public void testNorris() {
387         SimpleRegression regression = new SimpleRegression();
388         for (int i = 0; i < data.length; i++) {
389             regression.addData(data[i][1], data[i][0]);
390         }
391         // Tests against certified values from
392         // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
393         Assert.assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
394         Assert.assertEquals("slope std err", 0.429796848199937E-03,
395                 regression.getSlopeStdErr(),10E-12);
396         Assert.assertEquals("number of observations", 36, regression.getN());
397         Assert.assertEquals( "intercept", -0.262323073774029,
398             regression.getIntercept(),10E-12);
399         Assert.assertEquals("std err intercept", 0.232818234301152,
400             regression.getInterceptStdErr(),10E-12);
401         Assert.assertEquals("r-square", 0.999993745883712,
402             regression.getRSquare(), 10E-12);
403         Assert.assertEquals("SSR", 4255954.13232369,
404             regression.getRegressionSumSquares(), 10E-9);
405         Assert.assertEquals("MSE", 0.782864662630069,
406             regression.getMeanSquareError(), 10E-10);
407         Assert.assertEquals("SSE", 26.6173985294224,
408             regression.getSumSquaredErrors(),10E-9);
409         // ------------  End certified data tests
410 
411         Assert.assertEquals( "predict(0)",  -0.262323073774029,
412             regression.predict(0), 10E-12);
413         Assert.assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029,
414             regression.predict(1), 10E-12);
415     }
416 
417     @Test
418     public void testCorr() {
419         SimpleRegression regression = new SimpleRegression();
420         regression.addData(corrData);
421         Assert.assertEquals("number of observations", 17, regression.getN());
422         Assert.assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
423         Assert.assertEquals("r", -0.94663767742, regression.getR(), 1E-10);
424     }
425 
426     @Test
427     public void testNaNs() {
428         SimpleRegression regression = new SimpleRegression();
429         Assert.assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
430         Assert.assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
431         Assert.assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
432         Assert.assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
433         Assert.assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
434         Assert.assertTrue("e not NaN", Double.isNaN(regression.getR()));
435         Assert.assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
436         Assert.assertTrue( "RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
437         Assert.assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors()));
438         Assert.assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares()));
439         Assert.assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
440 
441         regression.addData(1, 2);
442         regression.addData(1, 3);
443 
444         // No x variation, so these should still blow...
445         Assert.assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
446         Assert.assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
447         Assert.assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
448         Assert.assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
449         Assert.assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
450         Assert.assertTrue("e not NaN", Double.isNaN(regression.getR()));
451         Assert.assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
452         Assert.assertTrue("RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
453         Assert.assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors()));
454         Assert.assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
455 
456         // but SSTO should be OK
457         Assert.assertFalse("SSTO NaN", Double.isNaN(regression.getTotalSumSquares()));
458 
459         regression = new SimpleRegression();
460 
461         regression.addData(1, 2);
462         regression.addData(3, 3);
463 
464         // All should be OK except MSE, s(b0), s(b1) which need one more df
465         Assert.assertFalse("interceptNaN", Double.isNaN(regression.getIntercept()));
466         Assert.assertFalse("slope NaN", Double.isNaN(regression.getSlope()));
467         Assert.assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
468         Assert.assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
469         Assert.assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
470         Assert.assertFalse("r NaN", Double.isNaN(regression.getR()));
471         Assert.assertFalse("r-square NaN", Double.isNaN(regression.getRSquare()));
472         Assert.assertFalse("RSS NaN", Double.isNaN(regression.getRegressionSumSquares()));
473         Assert.assertFalse("SSE NaN", Double.isNaN(regression.getSumSquaredErrors()));
474         Assert.assertFalse("SSTO NaN", Double.isNaN(regression.getTotalSumSquares()));
475         Assert.assertFalse("predict NaN", Double.isNaN(regression.predict(0)));
476 
477         regression.addData(1, 4);
478 
479         // MSE, MSE, s(b0), s(b1) should all be OK now
480         Assert.assertFalse("MSE NaN", Double.isNaN(regression.getMeanSquareError()));
481         Assert.assertFalse("slope std err NaN", Double.isNaN(regression.getSlopeStdErr()));
482         Assert.assertFalse("intercept std err NaN", Double.isNaN(regression.getInterceptStdErr()));
483     }
484 
485     @Test
486     public void testClear() {
487         SimpleRegression regression = new SimpleRegression();
488         regression.addData(corrData);
489         Assert.assertEquals("number of observations", 17, regression.getN());
490         regression.clear();
491         Assert.assertEquals("number of observations", 0, regression.getN());
492         regression.addData(corrData);
493         Assert.assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
494         regression.addData(data);
495         Assert.assertEquals("number of observations", 53, regression.getN());
496     }
497 
498     @Test
499     public void testInference() {
500         //----------  verified against R, version 1.8.1 -----
501         // infData
502         SimpleRegression regression = new SimpleRegression();
503         regression.addData(infData);
504         Assert.assertEquals("slope std err", 0.011448491,
505                 regression.getSlopeStdErr(), 1E-10);
506         Assert.assertEquals("std err intercept", 0.286036932,
507                 regression.getInterceptStdErr(),1E-8);
508         Assert.assertEquals("significance", 4.596e-07,
509                 regression.getSignificance(),1E-8);
510         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
511                 regression.getSlopeConfidenceInterval(),1E-8);
512         // infData2
513         regression = new SimpleRegression();
514         regression.addData(infData2);
515         Assert.assertEquals("slope std err", 1.07260253,
516                 regression.getSlopeStdErr(), 1E-8);
517         Assert.assertEquals("std err intercept",4.17718672,
518                 regression.getInterceptStdErr(),1E-8);
519         Assert.assertEquals("significance", 0.261829133982,
520                 regression.getSignificance(),1E-11);
521         Assert.assertEquals("slope conf interval half-width", 2.97802204827,
522                 regression.getSlopeConfidenceInterval(),1E-8);
523         //------------- End R-verified tests -------------------------------
524 
525         //FIXME: get a real example to test against with alpha = .01
526         Assert.assertTrue("tighter means wider",
527                 regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01));
528 
529         try {
530             regression.getSlopeConfidenceInterval(1);
531             Assert.fail("expecting MathIllegalArgumentException for alpha = 1");
532         } catch (MathIllegalArgumentException ex) {
533             // ignored
534         }
535     }
536 
537     @Test
538     public void testPerfect() {
539         SimpleRegression regression = new SimpleRegression();
540         int n = 100;
541         for (int i = 0; i < n; i++) {
542             regression.addData(((double) i) / (n - 1), i);
543         }
544         Assert.assertEquals(0.0, regression.getSignificance(), 1.0e-5);
545         Assert.assertTrue(regression.getSlope() > 0.0);
546         Assert.assertTrue(regression.getSumSquaredErrors() >= 0.0);
547     }
548 
549     @Test
550     public void testPerfect2() {
551         SimpleRegression regression = new SimpleRegression();
552         regression.addData(0, 0);
553         regression.addData(1, 1);
554         regression.addData(2, 2);
555         Assert.assertEquals(0.0, regression.getSlopeStdErr(), 0.0);
556         Assert.assertEquals(0.0, regression.getSignificance(), Double.MIN_VALUE);
557         Assert.assertEquals(1, regression.getRSquare(), Double.MIN_VALUE);
558     }
559 
560     @Test
561     public void testPerfectNegative() {
562         SimpleRegression regression = new SimpleRegression();
563         int n = 100;
564         for (int i = 0; i < n; i++) {
565             regression.addData(- ((double) i) / (n - 1), i);
566         }
567 
568         Assert.assertEquals(0.0, regression.getSignificance(), 1.0e-5);
569         Assert.assertTrue(regression.getSlope() < 0.0);
570     }
571 
572     @Test
573     public void testRandom() {
574         SimpleRegression regression = new SimpleRegression();
575         Random random = new Random(1);
576         int n = 100;
577         for (int i = 0; i < n; i++) {
578             regression.addData(((double) i) / (n - 1), random.nextDouble());
579         }
580 
581         Assert.assertTrue( 0.0 < regression.getSignificance()
582                     && regression.getSignificance() < 1.0);
583     }
584 
585 
586     // Jira MATH-85 = Bugzilla 39432
587     @Test
588     public void testSSENonNegative() {
589         double[] y = { 8915.102, 8919.302, 8923.502 };
590         double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 };
591         SimpleRegression reg = new SimpleRegression();
592         for (int i = 0; i < x.length; i++) {
593             reg.addData(x[i], y[i]);
594         }
595         Assert.assertTrue(reg.getSumSquaredErrors() >= 0.0);
596     }
597 
598     // Test remove X,Y (single observation)
599     @Test
600     public void testRemoveXY() {
601         // Create regression with inference data then remove to test
602         SimpleRegression regression = new SimpleRegression();
603         regression.addData(infData);
604         regression.removeData(removeX, removeY);
605         regression.addData(removeX, removeY);
606         // Use the inference assertions to make sure that everything worked
607         Assert.assertEquals("slope std err", 0.011448491,
608                 regression.getSlopeStdErr(), 1E-10);
609         Assert.assertEquals("std err intercept", 0.286036932,
610                 regression.getInterceptStdErr(),1E-8);
611         Assert.assertEquals("significance", 4.596e-07,
612                 regression.getSignificance(),1E-8);
613         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
614                 regression.getSlopeConfidenceInterval(),1E-8);
615      }
616 
617 
618     // Test remove single observation in array
619     @Test
620     public void testRemoveSingle() {
621         // Create regression with inference data then remove to test
622         SimpleRegression regression = new SimpleRegression();
623         regression.addData(infData);
624         regression.removeData(removeSingle);
625         regression.addData(removeSingle);
626         // Use the inference assertions to make sure that everything worked
627         Assert.assertEquals("slope std err", 0.011448491,
628                 regression.getSlopeStdErr(), 1E-10);
629         Assert.assertEquals("std err intercept", 0.286036932,
630                 regression.getInterceptStdErr(),1E-8);
631         Assert.assertEquals("significance", 4.596e-07,
632                 regression.getSignificance(),1E-8);
633         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
634                 regression.getSlopeConfidenceInterval(),1E-8);
635      }
636 
637     // Test remove multiple observations
638     @Test
639     public void testRemoveMultiple() {
640         // Create regression with inference data then remove to test
641         SimpleRegression regression = new SimpleRegression();
642         regression.addData(infData);
643         regression.removeData(removeMultiple);
644         regression.addData(removeMultiple);
645         // Use the inference assertions to make sure that everything worked
646         Assert.assertEquals("slope std err", 0.011448491,
647                 regression.getSlopeStdErr(), 1E-10);
648         Assert.assertEquals("std err intercept", 0.286036932,
649                 regression.getInterceptStdErr(),1E-8);
650         Assert.assertEquals("significance", 4.596e-07,
651                 regression.getSignificance(),1E-8);
652         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
653                 regression.getSlopeConfidenceInterval(),1E-8);
654      }
655 
656     // Remove observation when empty
657     @Test
658     public void testRemoveObsFromEmpty() {
659         SimpleRegression regression = new SimpleRegression();
660         regression.removeData(removeX, removeY);
661         Assert.assertEquals(regression.getN(), 0);
662     }
663 
664     // Remove single observation to empty
665     @Test
666     public void testRemoveObsFromSingle() {
667         SimpleRegression regression = new SimpleRegression();
668         regression.addData(removeX, removeY);
669         regression.removeData(removeX, removeY);
670         Assert.assertEquals(regression.getN(), 0);
671     }
672 
673     // Remove multiple observations to empty
674     @Test
675     public void testRemoveMultipleToEmpty() {
676         SimpleRegression regression = new SimpleRegression();
677         regression.addData(removeMultiple);
678         regression.removeData(removeMultiple);
679         Assert.assertEquals(regression.getN(), 0);
680     }
681 
682     // Remove multiple observations past empty (i.e. size of array > n)
683     @Test
684     public void testRemoveMultiplePastEmpty() {
685         SimpleRegression regression = new SimpleRegression();
686         regression.addData(removeX, removeY);
687         regression.removeData(removeMultiple);
688         Assert.assertEquals(regression.getN(), 0);
689     }
690 }