View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math4.legacy.stat.correlation;
19  
20  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
21  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
22  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
23  import org.apache.commons.math4.legacy.linear.BlockRealMatrix;
24  import org.apache.commons.math4.legacy.linear.RealMatrix;
25  import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy;
26  import org.apache.commons.math4.legacy.stat.ranking.NaturalRanking;
27  import org.apache.commons.math4.legacy.stat.ranking.RankingAlgorithm;
28  
29  /**
30   * Spearman's rank correlation. This implementation performs a rank
31   * transformation on the input data and then computes {@link PearsonsCorrelation}
32   * on the ranked data.
33   * <p>
34   * By default, ranks are computed using {@link NaturalRanking} with default
35   * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
36   * The ranking algorithm can be set using a constructor argument.
37   *
38   * @since 2.0
39   */
40  public class SpearmansCorrelation {
41  
42      /** Input data. */
43      private final RealMatrix data;
44  
45      /** Ranking algorithm.  */
46      private final RankingAlgorithm rankingAlgorithm;
47  
48      /** Rank correlation. */
49      private final PearsonsCorrelation rankCorrelation;
50  
51      /**
52       * Create a SpearmansCorrelation without data.
53       */
54      public SpearmansCorrelation() {
55          this(new NaturalRanking());
56      }
57  
58      /**
59       * Create a SpearmansCorrelation with the given ranking algorithm.
60       *
61       * @param rankingAlgorithm ranking algorithm
62       * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of
63       * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy
64       * @since 3.1
65       */
66      public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm)
67          throws MathIllegalArgumentException {
68  
69          if (rankingAlgorithm instanceof NaturalRanking &&
70              NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
71              throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY,
72                                                     NaNStrategy.REMOVED);
73          }
74  
75          data = null;
76          this.rankingAlgorithm = rankingAlgorithm;
77          rankCorrelation = null;
78      }
79  
80      /**
81       * Create a SpearmansCorrelation from the given data matrix.
82       *
83       * @param dataMatrix matrix of data with columns representing
84       * variables to correlate
85       */
86      public SpearmansCorrelation(final RealMatrix dataMatrix) {
87          this(dataMatrix, new NaturalRanking());
88      }
89  
90      /**
91       * Create a SpearmansCorrelation with the given input data matrix
92       * and ranking algorithm.
93       *
94       * @param dataMatrix matrix of data with columns representing
95       * variables to correlate
96       * @param rankingAlgorithm ranking algorithm
97       * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of
98       * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy
99       */
100     public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm)
101         throws MathIllegalArgumentException {
102 
103         if (rankingAlgorithm instanceof NaturalRanking &&
104             NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
105             throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY,
106                                                    NaNStrategy.REMOVED);
107         }
108 
109         this.rankingAlgorithm = rankingAlgorithm;
110         this.data = rankTransform(dataMatrix);
111         rankCorrelation = new PearsonsCorrelation(data);
112     }
113 
114     /**
115      * Calculate the Spearman Rank Correlation Matrix.
116      *
117      * @return Spearman Rank Correlation Matrix
118      * @throws NullPointerException if this instance was created with no data.
119      */
120     public RealMatrix getCorrelationMatrix() {
121         return rankCorrelation.getCorrelationMatrix();
122     }
123 
124     /**
125      * Returns a {@link PearsonsCorrelation} instance constructed from the
126      * ranked input data. That is,
127      * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
128      * is equivalent to
129      * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
130      * <code>rankTransform(matrix)</code> is the result of applying the
131      * configured <code>RankingAlgorithm</code> to each of the columns of
132      * <code>matrix.</code>
133      *
134      * <p>Returns null if this instance was created with no data.</p>
135      *
136      * @return PearsonsCorrelation among ranked column data
137      */
138     public PearsonsCorrelation getRankCorrelation() {
139         return rankCorrelation;
140     }
141 
142     /**
143      * Computes the Spearman's rank correlation matrix for the columns of the
144      * input matrix.
145      *
146      * @param matrix matrix with columns representing variables to correlate
147      * @return correlation matrix
148      */
149     public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
150         final RealMatrix matrixCopy = rankTransform(matrix);
151         return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
152     }
153 
154     /**
155      * Computes the Spearman's rank correlation matrix for the columns of the
156      * input rectangular array.  The columns of the array represent values
157      * of variables to be correlated.
158      *
159      * @param matrix matrix with columns representing variables to correlate
160      * @return correlation matrix
161      */
162     public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
163        return computeCorrelationMatrix(new BlockRealMatrix(matrix));
164     }
165 
166     /**
167      * Computes the Spearman's rank correlation coefficient between the two arrays.
168      *
169      * @param xArray first data array
170      * @param yArray second data array
171      * @return Returns Spearman's rank correlation coefficient for the two arrays
172      * @throws DimensionMismatchException if the arrays lengths do not match
173      * @throws MathIllegalArgumentException if the array length is less than 2
174      */
175     public double correlation(final double[] xArray, final double[] yArray) {
176         if (xArray.length != yArray.length) {
177             throw new DimensionMismatchException(xArray.length, yArray.length);
178         } else if (xArray.length < 2) {
179             throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
180                                                    xArray.length, 2);
181         } else {
182             return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
183                                                          rankingAlgorithm.rank(yArray));
184         }
185     }
186 
187     /**
188      * Applies rank transform to each of the columns of <code>matrix</code>
189      * using the current <code>rankingAlgorithm</code>.
190      *
191      * @param matrix matrix to transform
192      * @return a rank-transformed matrix
193      */
194     private RealMatrix rankTransform(final RealMatrix matrix) {
195         RealMatrix transformed = matrix.copy();
196         for (int i = 0; i < transformed.getColumnDimension(); i++) {
197             transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
198         }
199 
200         return transformed;
201     }
202 }