001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.stat.correlation;
019
020 import java.util.ArrayList;
021 import java.util.HashSet;
022 import java.util.List;
023 import java.util.Set;
024
025 import org.apache.commons.math3.exception.DimensionMismatchException;
026 import org.apache.commons.math3.exception.MathIllegalArgumentException;
027 import org.apache.commons.math3.exception.util.LocalizedFormats;
028 import org.apache.commons.math3.linear.BlockRealMatrix;
029 import org.apache.commons.math3.linear.RealMatrix;
030 import org.apache.commons.math3.stat.ranking.NaNStrategy;
031 import org.apache.commons.math3.stat.ranking.NaturalRanking;
032 import org.apache.commons.math3.stat.ranking.RankingAlgorithm;
033
034 /**
035 * Spearman's rank correlation. This implementation performs a rank
036 * transformation on the input data and then computes {@link PearsonsCorrelation}
037 * on the ranked data.
038 * <p>
039 * By default, ranks are computed using {@link NaturalRanking} with default
040 * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
041 * The ranking algorithm can be set using a constructor argument.
042 *
043 * @since 2.0
044 * @version $Id: SpearmansCorrelation.java 1461822 2013-03-27 19:44:22Z tn $
045 */
046 public class SpearmansCorrelation {
047
048 /** Input data */
049 private final RealMatrix data;
050
051 /** Ranking algorithm */
052 private final RankingAlgorithm rankingAlgorithm;
053
054 /** Rank correlation */
055 private final PearsonsCorrelation rankCorrelation;
056
057 /**
058 * Create a SpearmansCorrelation without data.
059 */
060 public SpearmansCorrelation() {
061 this(new NaturalRanking());
062 }
063
064 /**
065 * Create a SpearmansCorrelation with the given ranking algorithm.
066 * <p>
067 * From version 4.0 onwards this constructor will throw an exception
068 * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
069 *
070 * @param rankingAlgorithm ranking algorithm
071 * @since 3.1
072 */
073 public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm) {
074 data = null;
075 this.rankingAlgorithm = rankingAlgorithm;
076 rankCorrelation = null;
077 }
078
079 /**
080 * Create a SpearmansCorrelation from the given data matrix.
081 *
082 * @param dataMatrix matrix of data with columns representing
083 * variables to correlate
084 */
085 public SpearmansCorrelation(final RealMatrix dataMatrix) {
086 this(dataMatrix, new NaturalRanking());
087 }
088
089 /**
090 * Create a SpearmansCorrelation with the given input data matrix
091 * and ranking algorithm.
092 * <p>
093 * From version 4.0 onwards this constructor will throw an exception
094 * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
095 *
096 * @param dataMatrix matrix of data with columns representing
097 * variables to correlate
098 * @param rankingAlgorithm ranking algorithm
099 */
100 public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
101 this.rankingAlgorithm = rankingAlgorithm;
102 this.data = rankTransform(dataMatrix);
103 rankCorrelation = new PearsonsCorrelation(data);
104 }
105
106 /**
107 * Calculate the Spearman Rank Correlation Matrix.
108 *
109 * @return Spearman Rank Correlation Matrix
110 */
111 public RealMatrix getCorrelationMatrix() {
112 return rankCorrelation.getCorrelationMatrix();
113 }
114
115 /**
116 * Returns a {@link PearsonsCorrelation} instance constructed from the
117 * ranked input data. That is,
118 * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
119 * is equivalent to
120 * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
121 * <code>rankTransform(matrix)</code> is the result of applying the
122 * configured <code>RankingAlgorithm</code> to each of the columns of
123 * <code>matrix.</code>
124 *
125 * @return PearsonsCorrelation among ranked column data
126 */
127 public PearsonsCorrelation getRankCorrelation() {
128 return rankCorrelation;
129 }
130
131 /**
132 * Computes the Spearman's rank correlation matrix for the columns of the
133 * input matrix.
134 *
135 * @param matrix matrix with columns representing variables to correlate
136 * @return correlation matrix
137 */
138 public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
139 final RealMatrix matrixCopy = rankTransform(matrix);
140 return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
141 }
142
143 /**
144 * Computes the Spearman's rank correlation matrix for the columns of the
145 * input rectangular array. The columns of the array represent values
146 * of variables to be correlated.
147 *
148 * @param matrix matrix with columns representing variables to correlate
149 * @return correlation matrix
150 */
151 public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
152 return computeCorrelationMatrix(new BlockRealMatrix(matrix));
153 }
154
155 /**
156 * Computes the Spearman's rank correlation coefficient between the two arrays.
157 *
158 * @param xArray first data array
159 * @param yArray second data array
160 * @return Returns Spearman's rank correlation coefficient for the two arrays
161 * @throws DimensionMismatchException if the arrays lengths do not match
162 * @throws MathIllegalArgumentException if the array length is less than 2
163 */
164 public double correlation(final double[] xArray, final double[] yArray) {
165 if (xArray.length != yArray.length) {
166 throw new DimensionMismatchException(xArray.length, yArray.length);
167 } else if (xArray.length < 2) {
168 throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
169 xArray.length, 2);
170 } else {
171 double[] x = xArray;
172 double[] y = yArray;
173 if (rankingAlgorithm instanceof NaturalRanking &&
174 NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
175 final Set<Integer> nanPositions = new HashSet<Integer>();
176
177 nanPositions.addAll(getNaNPositions(xArray));
178 nanPositions.addAll(getNaNPositions(yArray));
179
180 x = removeValues(xArray, nanPositions);
181 y = removeValues(yArray, nanPositions);
182 }
183 return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(x), rankingAlgorithm.rank(y));
184 }
185 }
186
187 /**
188 * Applies rank transform to each of the columns of <code>matrix</code>
189 * using the current <code>rankingAlgorithm</code>.
190 *
191 * @param matrix matrix to transform
192 * @return a rank-transformed matrix
193 */
194 private RealMatrix rankTransform(final RealMatrix matrix) {
195 RealMatrix transformed = null;
196
197 if (rankingAlgorithm instanceof NaturalRanking &&
198 ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) {
199 final Set<Integer> nanPositions = new HashSet<Integer>();
200 for (int i = 0; i < matrix.getColumnDimension(); i++) {
201 nanPositions.addAll(getNaNPositions(matrix.getColumn(i)));
202 }
203
204 // if we have found NaN values, we have to update the matrix size
205 if (!nanPositions.isEmpty()) {
206 transformed = new BlockRealMatrix(matrix.getRowDimension() - nanPositions.size(),
207 matrix.getColumnDimension());
208 for (int i = 0; i < transformed.getColumnDimension(); i++) {
209 transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions));
210 }
211 }
212 }
213
214 if (transformed == null) {
215 transformed = matrix.copy();
216 }
217
218 for (int i = 0; i < transformed.getColumnDimension(); i++) {
219 transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
220 }
221
222 return transformed;
223 }
224
225 /**
226 * Returns a list containing the indices of NaN values in the input array.
227 *
228 * @param input the input array
229 * @return a list of NaN positions in the input array
230 */
231 private List<Integer> getNaNPositions(final double[] input) {
232 final List<Integer> positions = new ArrayList<Integer>();
233 for (int i = 0; i < input.length; i++) {
234 if (Double.isNaN(input[i])) {
235 positions.add(i);
236 }
237 }
238 return positions;
239 }
240
241 /**
242 * Removes all values from the input array at the specified indices.
243 *
244 * @param input the input array
245 * @param indices a set containing the indices to be removed
246 * @return the input array without the values at the specified indices
247 */
248 private double[] removeValues(final double[] input, final Set<Integer> indices) {
249 if (indices.isEmpty()) {
250 return input;
251 }
252 final double[] result = new double[input.length - indices.size()];
253 for (int i = 0, j = 0; i < input.length; i++) {
254 if (!indices.contains(i)) {
255 result[j++] = input[i];
256 }
257 }
258 return result;
259 }
260 }