View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.correlation;
18  
19  import org.apache.commons.math4.legacy.TestUtils;
20  import org.apache.commons.statistics.distribution.TDistribution;
21  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
22  import org.apache.commons.math4.legacy.linear.BlockRealMatrix;
23  import org.apache.commons.math4.legacy.linear.RealMatrix;
24  import org.apache.commons.math4.core.jdkmath.JdkMath;
25  import org.junit.Assert;
26  import org.junit.Test;
27  
28  
29  public class PearsonsCorrelationTest {
30  
31      protected final double[] longleyData = new double[] {
32              60323,83.0,234289,2356,1590,107608,1947,
33              61122,88.5,259426,2325,1456,108632,1948,
34              60171,88.2,258054,3682,1616,109773,1949,
35              61187,89.5,284599,3351,1650,110929,1950,
36              63221,96.2,328975,2099,3099,112075,1951,
37              63639,98.1,346999,1932,3594,113270,1952,
38              64989,99.0,365385,1870,3547,115094,1953,
39              63761,100.0,363112,3578,3350,116219,1954,
40              66019,101.2,397469,2904,3048,117388,1955,
41              67857,104.6,419180,2822,2857,118734,1956,
42              68169,108.4,442769,2936,2798,120445,1957,
43              66513,110.8,444546,4681,2637,121950,1958,
44              68655,112.6,482704,3813,2552,123366,1959,
45              69564,114.2,502601,3931,2514,125368,1960,
46              69331,115.7,518173,4806,2572,127852,1961,
47              70551,116.9,554894,4007,2827,130081,1962
48          };
49  
50      protected final double[] swissData = new double[] {
51              80.2,17.0,15,12,9.96,
52              83.1,45.1,6,9,84.84,
53              92.5,39.7,5,5,93.40,
54              85.8,36.5,12,7,33.77,
55              76.9,43.5,17,15,5.16,
56              76.1,35.3,9,7,90.57,
57              83.8,70.2,16,7,92.85,
58              92.4,67.8,14,8,97.16,
59              82.4,53.3,12,7,97.67,
60              82.9,45.2,16,13,91.38,
61              87.1,64.5,14,6,98.61,
62              64.1,62.0,21,12,8.52,
63              66.9,67.5,14,7,2.27,
64              68.9,60.7,19,12,4.43,
65              61.7,69.3,22,5,2.82,
66              68.3,72.6,18,2,24.20,
67              71.7,34.0,17,8,3.30,
68              55.7,19.4,26,28,12.11,
69              54.3,15.2,31,20,2.15,
70              65.1,73.0,19,9,2.84,
71              65.5,59.8,22,10,5.23,
72              65.0,55.1,14,3,4.52,
73              56.6,50.9,22,12,15.14,
74              57.4,54.1,20,6,4.20,
75              72.5,71.2,12,1,2.40,
76              74.2,58.1,14,8,5.23,
77              72.0,63.5,6,3,2.56,
78              60.5,60.8,16,10,7.72,
79              58.3,26.8,25,19,18.46,
80              65.4,49.5,15,8,6.10,
81              75.5,85.9,3,2,99.71,
82              69.3,84.9,7,6,99.68,
83              77.3,89.7,5,2,100.00,
84              70.5,78.2,12,6,98.96,
85              79.4,64.9,7,3,98.22,
86              65.0,75.9,9,9,99.06,
87              92.2,84.6,3,3,99.46,
88              79.3,63.1,13,13,96.83,
89              70.4,38.4,26,12,5.62,
90              65.7,7.7,29,11,13.79,
91              72.7,16.7,22,13,11.22,
92              64.4,17.6,35,32,16.92,
93              77.6,37.6,15,7,4.97,
94              67.6,18.7,25,7,8.65,
95              35.0,1.2,37,53,42.34,
96              44.7,46.6,16,29,50.43,
97              42.8,27.7,22,29,58.33
98          };
99  
100 
101     /**
102      * Test Longley dataset against R.
103      */
104     @Test
105     public void testLongley() {
106         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
107         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
108         RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
109         double[] rData = new double[] {
110                 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
111                 0.4573073999764817, 0.960390571594376, 0.9713294591921188,
112                 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
113                 0.4647441876006747, 0.979163432977498, 0.9911491900672053,
114                 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
115                 0.4464367918926265, 0.991090069458478, 0.9952734837647849,
116                 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
117                 -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
118                 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
119                 1.0000000000000000, 0.364416267189032, 0.4172451498349454,
120                 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
121                 0.3644162671890320, 1.000000000000000, 0.9939528462329257,
122                 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
123                 0.4172451498349454, 0.993952846232926, 1.0000000000000000
124         };
125         TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
126 
127         double[] rPvalues = new double[] {
128                 4.38904690369668e-10,
129                 8.36353208910623e-12, 7.8159700933611e-14,
130                 0.0472894097790304, 0.01030636128354301, 0.01316878049026582,
131                 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
132                 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684,
133                 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15
134         };
135         RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7);
136         fillUpper(rPMatrix, 0d);
137         TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
138     }
139 
140     /**
141      * Test R Swiss fertility dataset against R.
142      */
143     @Test
144     public void testSwissFertility() {
145          RealMatrix matrix = createRealMatrix(swissData, 47, 5);
146          PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
147          RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
148          double[] rData = new double[] {
149                1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,  0.4636847006517939,
150                  0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398,
151                 -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666,
152                 -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148,
153                  0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000
154          };
155          TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
156 
157          double[] rPvalues = new double[] {
158                  0.01491720061472623,
159                  9.45043734069043e-07, 9.95151527133974e-08,
160                  3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08,
161                  0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683
162          };
163          RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5);
164          fillUpper(rPMatrix, 0d);
165          TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
166     }
167 
168     /**
169      * Test p-value near 0. JIRA: MATH-371
170      */
171     @Test
172     public void testPValueNearZero() {
173         /*
174          * Create a dataset that has r -> 1, p -> 0 as dimension increases.
175          * Prior to the fix for MATH-371, p vanished for dimension >= 14.
176          * Post fix, p-values diminish smoothly, vanishing at dimension = 127.
177          * Tested value is ~1E-303.
178          */
179         int dimension = 120;
180         double[][] data = new double[dimension][2];
181         for (int i = 0; i < dimension; i++) {
182             data[i][0] = i;
183             data[i][1] = i + 1/((double)i + 1);
184         }
185         PearsonsCorrelation corrInstance = new PearsonsCorrelation(data);
186         Assert.assertTrue(corrInstance.getCorrelationPValues().getEntry(0, 1) > 0);
187     }
188 
189 
190     /**
191      * Constant column
192      */
193     @Test
194     public void testConstant() {
195         double[] noVariance = new double[] {1, 1, 1, 1};
196         double[] values = new double[] {1, 2, 3, 4};
197         Assert.assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
198         Assert.assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(values, noVariance)));
199     }
200 
201 
202     /**
203      * Insufficient data
204      */
205 
206     @Test
207     public void testInsufficientData() {
208         double[] one = new double[] {1};
209         double[] two = new double[] {2};
210         try {
211             new PearsonsCorrelation().correlation(one, two);
212             Assert.fail("Expecting MathIllegalArgumentException");
213         } catch (MathIllegalArgumentException ex) {
214             // Expected
215         }
216         RealMatrix matrix = new BlockRealMatrix(new double[][] {{0},{1}});
217         try {
218             new PearsonsCorrelation(matrix);
219             Assert.fail("Expecting MathIllegalArgumentException");
220         } catch (MathIllegalArgumentException ex) {
221             // Expected
222         }
223     }
224 
225     /**
226      * Verify that direct t-tests using standard error estimates are consistent
227      * with reported p-values
228      */
229     @Test
230     public void testStdErrorConsistency() {
231         TDistribution tDistribution = TDistribution.of(45);
232         RealMatrix matrix = createRealMatrix(swissData, 47, 5);
233         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
234         RealMatrix rValues = corrInstance.getCorrelationMatrix();
235         RealMatrix pValues = corrInstance.getCorrelationPValues();
236         RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
237         for (int i = 0; i < 5; i++) {
238             for (int j = 0; j < i; j++) {
239                 double t = JdkMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
240                 double p = 2 * tDistribution.survivalProbability(t);
241                 Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
242             }
243         }
244     }
245 
246     /**
247      * Verify that creating correlation from covariance gives same results as
248      * direct computation from the original matrix
249      */
250     @Test
251     public void testCovarianceConsistency() {
252         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
253         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
254         Covariance covInstance = new Covariance(matrix);
255         PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance);
256         TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
257                 corrFromCovInstance.getCorrelationMatrix(), 10E-15);
258         TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
259                 corrFromCovInstance.getCorrelationPValues(), 10E-15);
260         TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
261                 corrFromCovInstance.getCorrelationStandardErrors(), 10E-15);
262 
263         PearsonsCorrelation corrFromCovInstance2 =
264             new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16);
265         TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
266                 corrFromCovInstance2.getCorrelationMatrix(), 10E-15);
267         TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
268                 corrFromCovInstance2.getCorrelationPValues(), 10E-15);
269         TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
270                 corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15);
271     }
272 
273 
274     @Test
275     public void testConsistency() {
276         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
277         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
278         double[][] data = matrix.getData();
279         double[] x = matrix.getColumn(0);
280         double[] y = matrix.getColumn(1);
281         Assert.assertEquals(new PearsonsCorrelation().correlation(x, y),
282                 corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
283         TestUtils.assertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(),
284                 new PearsonsCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
285     }
286 
287     protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
288         double[][] matrixData = new double[nRows][nCols];
289         int ptr = 0;
290         for (int i = 0; i < nRows; i++) {
291             System.arraycopy(data, ptr, matrixData[i], 0, nCols);
292             ptr += nCols;
293         }
294         return new BlockRealMatrix(matrixData);
295     }
296 
297     protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) {
298         int ptr = 0;
299         RealMatrix result = new BlockRealMatrix(dimension, dimension);
300         for (int i = 1; i < dimension; i++) {
301             for (int j = 0; j < i; j++) {
302                 result.setEntry(i, j, data[ptr]);
303                 ptr++;
304             }
305         }
306         return result;
307     }
308 
309     protected void fillUpper(RealMatrix matrix, double diagonalValue) {
310         int dimension = matrix.getColumnDimension();
311         for (int i = 0; i < dimension; i++) {
312             matrix.setEntry(i, i, diagonalValue);
313             for (int j = i+1; j < dimension; j++) {
314                 matrix.setEntry(i, j, matrix.getEntry(j, i));
315             }
316         }
317     }
318 }