View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.regression;
18  
19  import java.util.function.Supplier;
20  
21  import org.junit.Assert;
22  import org.junit.Before;
23  import org.junit.Test;
24  
25  import org.apache.commons.rng.UniformRandomProvider;
26  import org.apache.commons.rng.simple.RandomSource;
27  import org.apache.commons.statistics.distribution.ContinuousDistribution;
28  import org.apache.commons.statistics.distribution.NormalDistribution;
29  import org.apache.commons.math4.legacy.TestUtils;
30  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
31  import org.apache.commons.math4.legacy.exception.NullArgumentException;
32  import org.apache.commons.math4.legacy.linear.MatrixUtils;
33  import org.apache.commons.math4.legacy.linear.RealMatrix;
34  import org.apache.commons.math4.legacy.linear.RealVector;
35  import org.apache.commons.math4.legacy.random.CorrelatedVectorFactory;
36  import org.apache.commons.math4.legacy.stat.correlation.Covariance;
37  import org.apache.commons.math4.legacy.stat.descriptive.DescriptiveStatistics;
38  
39  public class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
40  
41      private double[] y;
42      private double[][] x;
43      private double[][] omega;
44      private final double[] longley = new double[] {
45              60323,83.0,234289,2356,1590,107608,1947,
46              61122,88.5,259426,2325,1456,108632,1948,
47              60171,88.2,258054,3682,1616,109773,1949,
48              61187,89.5,284599,3351,1650,110929,1950,
49              63221,96.2,328975,2099,3099,112075,1951,
50              63639,98.1,346999,1932,3594,113270,1952,
51              64989,99.0,365385,1870,3547,115094,1953,
52              63761,100.0,363112,3578,3350,116219,1954,
53              66019,101.2,397469,2904,3048,117388,1955,
54              67857,104.6,419180,2822,2857,118734,1956,
55              68169,108.4,442769,2936,2798,120445,1957,
56              66513,110.8,444546,4681,2637,121950,1958,
57              68655,112.6,482704,3813,2552,123366,1959,
58              69564,114.2,502601,3931,2514,125368,1960,
59              69331,115.7,518173,4806,2572,127852,1961,
60              70551,116.9,554894,4007,2827,130081,1962
61          };
62  
63      @Before
64      @Override
65      public void setUp(){
66          y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
67          x = new double[6][];
68          x[0] = new double[]{0, 0, 0, 0, 0};
69          x[1] = new double[]{2.0, 0, 0, 0, 0};
70          x[2] = new double[]{0, 3.0, 0, 0, 0};
71          x[3] = new double[]{0, 0, 4.0, 0, 0};
72          x[4] = new double[]{0, 0, 0, 5.0, 0};
73          x[5] = new double[]{0, 0, 0, 0, 6.0};
74          omega = new double[6][];
75          omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
76          omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
77          omega[2] = new double[]{0, 0, 3.0, 0, 0, 0};
78          omega[3] = new double[]{0, 0, 0, 4.0, 0, 0};
79          omega[4] = new double[]{0, 0, 0, 0, 5.0, 0};
80          omega[5] = new double[]{0, 0, 0, 0, 0, 6.0};
81          super.setUp();
82      }
83  
84      @Test(expected=NullArgumentException.class)
85      public void cannotAddXSampleData() {
86          createRegression().newSampleData(new double[]{}, null, null);
87      }
88  
89      @Test(expected=NullArgumentException.class)
90      public void cannotAddNullYSampleData() {
91          createRegression().newSampleData(null, new double[][]{}, null);
92      }
93  
94      @Test(expected=MathIllegalArgumentException.class)
95      public void cannotAddSampleDataWithSizeMismatch() {
96          double[] y = new double[]{1.0, 2.0};
97          double[][] x = new double[1][];
98          x[0] = new double[]{1.0, 0};
99          createRegression().newSampleData(y, x, null);
100     }
101 
102     @Test(expected=MathIllegalArgumentException.class)
103     public void cannotAddNullCovarianceData() {
104         createRegression().newSampleData(new double[]{}, new double[][]{}, null);
105     }
106 
107     @Test(expected=MathIllegalArgumentException.class)
108     public void notEnoughData() {
109         double[]   reducedY = new double[y.length - 1];
110         double[][] reducedX = new double[x.length - 1][];
111         double[][] reducedO = new double[omega.length - 1][];
112         System.arraycopy(y,     0, reducedY, 0, reducedY.length);
113         System.arraycopy(x,     0, reducedX, 0, reducedX.length);
114         System.arraycopy(omega, 0, reducedO, 0, reducedO.length);
115         createRegression().newSampleData(reducedY, reducedX, reducedO);
116     }
117 
118     @Test(expected=MathIllegalArgumentException.class)
119     public void cannotAddCovarianceDataWithSampleSizeMismatch() {
120         double[] y = new double[]{1.0, 2.0};
121         double[][] x = new double[2][];
122         x[0] = new double[]{1.0, 0};
123         x[1] = new double[]{0, 1.0};
124         double[][] omega = new double[1][];
125         omega[0] = new double[]{1.0, 0};
126         createRegression().newSampleData(y, x, omega);
127     }
128 
129     @Test(expected=MathIllegalArgumentException.class)
130     public void cannotAddCovarianceDataThatIsNotSquare() {
131         double[] y = new double[]{1.0, 2.0};
132         double[][] x = new double[2][];
133         x[0] = new double[]{1.0, 0};
134         x[1] = new double[]{0, 1.0};
135         double[][] omega = new double[3][];
136         omega[0] = new double[]{1.0, 0};
137         omega[1] = new double[]{0, 1.0};
138         omega[2] = new double[]{0, 2.0};
139         createRegression().newSampleData(y, x, omega);
140     }
141 
142     @Override
143     protected GLSMultipleLinearRegression createRegression() {
144         GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
145         regression.newSampleData(y, x, omega);
146         return regression;
147     }
148 
149     @Override
150     protected int getNumberOfRegressors() {
151         return x[0].length + 1;
152     }
153 
154     @Override
155     protected int getSampleSize() {
156         return y.length;
157     }
158 
159     /**
160      * test calculateYVariance
161      */
162     @Test
163     public void testYVariance() {
164 
165         // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
166 
167         GLSMultipleLinearRegression model = new GLSMultipleLinearRegression();
168         model.newSampleData(y, x, omega);
169         TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
170     }
171 
172     /**
173      * Verifies that setting X, Y and covariance separately has the same effect as newSample(X,Y,cov).
174      */
175     @Test
176     public void testNewSample2() {
177         double[] y = new double[] {1, 2, 3, 4};
178         double[][] x = new double[][] {
179           {19, 22, 33},
180           {20, 30, 40},
181           {25, 35, 45},
182           {27, 37, 47}
183         };
184         double[][] covariance = MatrixUtils.createRealIdentityMatrix(4).scalarMultiply(2).getData();
185         GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
186         regression.newSampleData(y, x, covariance);
187         RealMatrix combinedX = regression.getX().copy();
188         RealVector combinedY = regression.getY().copy();
189         RealMatrix combinedCovInv = regression.getOmegaInverse();
190         regression.newXSampleData(x);
191         regression.newYSampleData(y);
192         Assert.assertEquals(combinedX, regression.getX());
193         Assert.assertEquals(combinedY, regression.getY());
194         Assert.assertEquals(combinedCovInv, regression.getOmegaInverse());
195     }
196 
197     /**
198      * Verifies that GLS with identity covariance matrix gives the same results
199      * as OLS.
200      */
201     @Test
202     public void testGLSOLSConsistency() {
203         RealMatrix identityCov = MatrixUtils.createRealIdentityMatrix(16);
204         GLSMultipleLinearRegression glsModel = new GLSMultipleLinearRegression();
205         OLSMultipleLinearRegression olsModel = new OLSMultipleLinearRegression();
206         glsModel.newSampleData(longley, 16, 6);
207         olsModel.newSampleData(longley, 16, 6);
208         glsModel.newCovarianceData(identityCov.getData());
209         double[] olsBeta = olsModel.calculateBeta().toArray();
210         double[] glsBeta = glsModel.calculateBeta().toArray();
211         // TODO:  Should have assertRelativelyEquals(double[], double[], eps) in TestUtils
212         //        Should also add RealVector and RealMatrix versions
213         for (int i = 0; i < olsBeta.length; i++) {
214             TestUtils.assertRelativelyEquals(olsBeta[i], glsBeta[i], 10E-7);
215         }
216     }
217 
218     /**
219      * Generate an error covariance matrix and sample data representing models
220      * with this error structure. Then verify that GLS estimated coefficients,
221      * on average, perform better than OLS.
222      */
223     @Test
224     public void testGLSEfficiency() {
225         final UniformRandomProvider rg = RandomSource.MT.create();
226         final ContinuousDistribution.Sampler gauss = NormalDistribution.of(0, 1).createSampler(rg);
227 
228         // Assume model has 16 observations (will use Longley data).  Start by generating
229         // non-constant variances for the 16 error terms.
230         final int nObs = 16;
231         double[] sigma = new double[nObs];
232         for (int i = 0; i < nObs; i++) {
233             sigma[i] = 10 * rg.nextDouble();
234         }
235 
236         // Now generate 1000 error vectors to use to estimate the covariance matrix
237         // Columns are draws on N(0, sigma[col])
238         final int numSeeds = 1000;
239         RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
240         for (int i = 0; i < numSeeds; i++) {
241             for (int j = 0; j < nObs; j++) {
242                 errorSeeds.setEntry(i, j, gauss.sample() * sigma[j]);
243             }
244         }
245 
246         // Get covariance matrix for columns
247         RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
248 
249         // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
250         final Supplier<double[]> gen
251             = new CorrelatedVectorFactory(cov, 1e-12 * cov.getNorm()).gaussian(rg);
252 
253         // Now start generating models.  Use Longley X matrix on LHS
254         // and Longley OLS beta vector as "true" beta.  Generate
255         // Y values by XB + u where u is a CorrelatedRandomVector generated
256         // from cov.
257         OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
258         ols.newSampleData(longley, nObs, 6);
259         final RealVector b = ols.calculateBeta().copy();
260         final RealMatrix x = ols.getX().copy();
261 
262         // Create a GLS model to reuse
263         GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
264         gls.newSampleData(longley, nObs, 6);
265         gls.newCovarianceData(cov.getData());
266 
267         // Create aggregators for stats measuring model performance
268         DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
269         DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
270 
271         // Generate Y vectors for 10000 models, estimate GLS and OLS and
272         // Verify that OLS estimates are better
273         final int nModels = 10000;
274         for (int i = 0; i < nModels; i++) {
275 
276             // Generate y = xb + u with u cov
277             RealVector u = MatrixUtils.createRealVector(gen.get());
278             double[] y = u.add(x.operate(b)).toArray();
279 
280             // Estimate OLS parameters
281             ols.newYSampleData(y);
282             RealVector olsBeta = ols.calculateBeta();
283 
284             // Estimate GLS parameters
285             gls.newYSampleData(y);
286             RealVector glsBeta = gls.calculateBeta();
287 
288             // Record deviations from "true" beta
289             double dist = olsBeta.getDistance(b);
290             olsBetaStats.addValue(dist * dist);
291             dist = glsBeta.getDistance(b);
292             glsBetaStats.addValue(dist * dist);
293         }
294 
295         // Verify that GLS is on average more efficient, lower variance
296         assert olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean();
297         assert olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation();
298     }
299 }