001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math4.legacy.stat.correlation; 019 020import org.apache.commons.math4.legacy.exception.DimensionMismatchException; 021import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException; 022import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 023import org.apache.commons.math4.legacy.linear.BlockRealMatrix; 024import org.apache.commons.math4.legacy.linear.RealMatrix; 025import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy; 026import org.apache.commons.math4.legacy.stat.ranking.NaturalRanking; 027import org.apache.commons.math4.legacy.stat.ranking.RankingAlgorithm; 028 029/** 030 * Spearman's rank correlation. This implementation performs a rank 031 * transformation on the input data and then computes {@link PearsonsCorrelation} 032 * on the ranked data. 033 * <p> 034 * By default, ranks are computed using {@link NaturalRanking} with default 035 * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged). 036 * The ranking algorithm can be set using a constructor argument. 037 * 038 * @since 2.0 039 */ 040public class SpearmansCorrelation { 041 042 /** Input data. */ 043 private final RealMatrix data; 044 045 /** Ranking algorithm. */ 046 private final RankingAlgorithm rankingAlgorithm; 047 048 /** Rank correlation. */ 049 private final PearsonsCorrelation rankCorrelation; 050 051 /** 052 * Create a SpearmansCorrelation without data. 053 */ 054 public SpearmansCorrelation() { 055 this(new NaturalRanking()); 056 } 057 058 /** 059 * Create a SpearmansCorrelation with the given ranking algorithm. 060 * 061 * @param rankingAlgorithm ranking algorithm 062 * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of 063 * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy 064 * @since 3.1 065 */ 066 public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm) 067 throws MathIllegalArgumentException { 068 069 if (rankingAlgorithm instanceof NaturalRanking && 070 NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) { 071 throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY, 072 NaNStrategy.REMOVED); 073 } 074 075 data = null; 076 this.rankingAlgorithm = rankingAlgorithm; 077 rankCorrelation = null; 078 } 079 080 /** 081 * Create a SpearmansCorrelation from the given data matrix. 082 * 083 * @param dataMatrix matrix of data with columns representing 084 * variables to correlate 085 */ 086 public SpearmansCorrelation(final RealMatrix dataMatrix) { 087 this(dataMatrix, new NaturalRanking()); 088 } 089 090 /** 091 * Create a SpearmansCorrelation with the given input data matrix 092 * and ranking algorithm. 093 * 094 * @param dataMatrix matrix of data with columns representing 095 * variables to correlate 096 * @param rankingAlgorithm ranking algorithm 097 * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of 098 * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy 099 */ 100 public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) 101 throws MathIllegalArgumentException { 102 103 if (rankingAlgorithm instanceof NaturalRanking && 104 NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) { 105 throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY, 106 NaNStrategy.REMOVED); 107 } 108 109 this.rankingAlgorithm = rankingAlgorithm; 110 this.data = rankTransform(dataMatrix); 111 rankCorrelation = new PearsonsCorrelation(data); 112 } 113 114 /** 115 * Calculate the Spearman Rank Correlation Matrix. 116 * 117 * @return Spearman Rank Correlation Matrix 118 * @throws NullPointerException if this instance was created with no data. 119 */ 120 public RealMatrix getCorrelationMatrix() { 121 return rankCorrelation.getCorrelationMatrix(); 122 } 123 124 /** 125 * Returns a {@link PearsonsCorrelation} instance constructed from the 126 * ranked input data. That is, 127 * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code> 128 * is equivalent to 129 * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where 130 * <code>rankTransform(matrix)</code> is the result of applying the 131 * configured <code>RankingAlgorithm</code> to each of the columns of 132 * <code>matrix.</code> 133 * 134 * <p>Returns null if this instance was created with no data.</p> 135 * 136 * @return PearsonsCorrelation among ranked column data 137 */ 138 public PearsonsCorrelation getRankCorrelation() { 139 return rankCorrelation; 140 } 141 142 /** 143 * Computes the Spearman's rank correlation matrix for the columns of the 144 * input matrix. 145 * 146 * @param matrix matrix with columns representing variables to correlate 147 * @return correlation matrix 148 */ 149 public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) { 150 final RealMatrix matrixCopy = rankTransform(matrix); 151 return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy); 152 } 153 154 /** 155 * Computes the Spearman's rank correlation matrix for the columns of the 156 * input rectangular array. The columns of the array represent values 157 * of variables to be correlated. 158 * 159 * @param matrix matrix with columns representing variables to correlate 160 * @return correlation matrix 161 */ 162 public RealMatrix computeCorrelationMatrix(final double[][] matrix) { 163 return computeCorrelationMatrix(new BlockRealMatrix(matrix)); 164 } 165 166 /** 167 * Computes the Spearman's rank correlation coefficient between the two arrays. 168 * 169 * @param xArray first data array 170 * @param yArray second data array 171 * @return Returns Spearman's rank correlation coefficient for the two arrays 172 * @throws DimensionMismatchException if the arrays lengths do not match 173 * @throws MathIllegalArgumentException if the array length is less than 2 174 */ 175 public double correlation(final double[] xArray, final double[] yArray) { 176 if (xArray.length != yArray.length) { 177 throw new DimensionMismatchException(xArray.length, yArray.length); 178 } else if (xArray.length < 2) { 179 throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION, 180 xArray.length, 2); 181 } else { 182 return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray), 183 rankingAlgorithm.rank(yArray)); 184 } 185 } 186 187 /** 188 * Applies rank transform to each of the columns of <code>matrix</code> 189 * using the current <code>rankingAlgorithm</code>. 190 * 191 * @param matrix matrix to transform 192 * @return a rank-transformed matrix 193 */ 194 private RealMatrix rankTransform(final RealMatrix matrix) { 195 RealMatrix transformed = matrix.copy(); 196 for (int i = 0; i < transformed.getColumnDimension(); i++) { 197 transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i))); 198 } 199 200 return transformed; 201 } 202}