001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.legacy.stat.correlation;
019
020import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
021import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
022import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
023import org.apache.commons.math4.legacy.linear.BlockRealMatrix;
024import org.apache.commons.math4.legacy.linear.RealMatrix;
025import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy;
026import org.apache.commons.math4.legacy.stat.ranking.NaturalRanking;
027import org.apache.commons.math4.legacy.stat.ranking.RankingAlgorithm;
028
029/**
030 * Spearman's rank correlation. This implementation performs a rank
031 * transformation on the input data and then computes {@link PearsonsCorrelation}
032 * on the ranked data.
033 * <p>
034 * By default, ranks are computed using {@link NaturalRanking} with default
035 * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
036 * The ranking algorithm can be set using a constructor argument.
037 *
038 * @since 2.0
039 */
040public class SpearmansCorrelation {
041
042    /** Input data. */
043    private final RealMatrix data;
044
045    /** Ranking algorithm.  */
046    private final RankingAlgorithm rankingAlgorithm;
047
048    /** Rank correlation. */
049    private final PearsonsCorrelation rankCorrelation;
050
051    /**
052     * Create a SpearmansCorrelation without data.
053     */
054    public SpearmansCorrelation() {
055        this(new NaturalRanking());
056    }
057
058    /**
059     * Create a SpearmansCorrelation with the given ranking algorithm.
060     *
061     * @param rankingAlgorithm ranking algorithm
062     * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of
063     * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy
064     * @since 3.1
065     */
066    public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm)
067        throws MathIllegalArgumentException {
068
069        if (rankingAlgorithm instanceof NaturalRanking &&
070            NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
071            throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY,
072                                                   NaNStrategy.REMOVED);
073        }
074
075        data = null;
076        this.rankingAlgorithm = rankingAlgorithm;
077        rankCorrelation = null;
078    }
079
080    /**
081     * Create a SpearmansCorrelation from the given data matrix.
082     *
083     * @param dataMatrix matrix of data with columns representing
084     * variables to correlate
085     */
086    public SpearmansCorrelation(final RealMatrix dataMatrix) {
087        this(dataMatrix, new NaturalRanking());
088    }
089
090    /**
091     * Create a SpearmansCorrelation with the given input data matrix
092     * and ranking algorithm.
093     *
094     * @param dataMatrix matrix of data with columns representing
095     * variables to correlate
096     * @param rankingAlgorithm ranking algorithm
097     * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of
098     * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy
099     */
100    public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm)
101        throws MathIllegalArgumentException {
102
103        if (rankingAlgorithm instanceof NaturalRanking &&
104            NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
105            throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY,
106                                                   NaNStrategy.REMOVED);
107        }
108
109        this.rankingAlgorithm = rankingAlgorithm;
110        this.data = rankTransform(dataMatrix);
111        rankCorrelation = new PearsonsCorrelation(data);
112    }
113
114    /**
115     * Calculate the Spearman Rank Correlation Matrix.
116     *
117     * @return Spearman Rank Correlation Matrix
118     * @throws NullPointerException if this instance was created with no data.
119     */
120    public RealMatrix getCorrelationMatrix() {
121        return rankCorrelation.getCorrelationMatrix();
122    }
123
124    /**
125     * Returns a {@link PearsonsCorrelation} instance constructed from the
126     * ranked input data. That is,
127     * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
128     * is equivalent to
129     * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
130     * <code>rankTransform(matrix)</code> is the result of applying the
131     * configured <code>RankingAlgorithm</code> to each of the columns of
132     * <code>matrix.</code>
133     *
134     * <p>Returns null if this instance was created with no data.</p>
135     *
136     * @return PearsonsCorrelation among ranked column data
137     */
138    public PearsonsCorrelation getRankCorrelation() {
139        return rankCorrelation;
140    }
141
142    /**
143     * Computes the Spearman's rank correlation matrix for the columns of the
144     * input matrix.
145     *
146     * @param matrix matrix with columns representing variables to correlate
147     * @return correlation matrix
148     */
149    public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
150        final RealMatrix matrixCopy = rankTransform(matrix);
151        return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
152    }
153
154    /**
155     * Computes the Spearman's rank correlation matrix for the columns of the
156     * input rectangular array.  The columns of the array represent values
157     * of variables to be correlated.
158     *
159     * @param matrix matrix with columns representing variables to correlate
160     * @return correlation matrix
161     */
162    public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
163       return computeCorrelationMatrix(new BlockRealMatrix(matrix));
164    }
165
166    /**
167     * Computes the Spearman's rank correlation coefficient between the two arrays.
168     *
169     * @param xArray first data array
170     * @param yArray second data array
171     * @return Returns Spearman's rank correlation coefficient for the two arrays
172     * @throws DimensionMismatchException if the arrays lengths do not match
173     * @throws MathIllegalArgumentException if the array length is less than 2
174     */
175    public double correlation(final double[] xArray, final double[] yArray) {
176        if (xArray.length != yArray.length) {
177            throw new DimensionMismatchException(xArray.length, yArray.length);
178        } else if (xArray.length < 2) {
179            throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
180                                                   xArray.length, 2);
181        } else {
182            return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
183                                                         rankingAlgorithm.rank(yArray));
184        }
185    }
186
187    /**
188     * Applies rank transform to each of the columns of <code>matrix</code>
189     * using the current <code>rankingAlgorithm</code>.
190     *
191     * @param matrix matrix to transform
192     * @return a rank-transformed matrix
193     */
194    private RealMatrix rankTransform(final RealMatrix matrix) {
195        RealMatrix transformed = matrix.copy();
196        for (int i = 0; i < transformed.getColumnDimension(); i++) {
197            transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
198        }
199
200        return transformed;
201    }
202}