SpearmansCorrelation.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.math4.legacy.stat.correlation;

  18. import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
  19. import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
  20. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
  21. import org.apache.commons.math4.legacy.linear.BlockRealMatrix;
  22. import org.apache.commons.math4.legacy.linear.RealMatrix;
  23. import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy;
  24. import org.apache.commons.math4.legacy.stat.ranking.NaturalRanking;
  25. import org.apache.commons.math4.legacy.stat.ranking.RankingAlgorithm;

  26. /**
  27.  * Spearman's rank correlation. This implementation performs a rank
  28.  * transformation on the input data and then computes {@link PearsonsCorrelation}
  29.  * on the ranked data.
  30.  * <p>
  31.  * By default, ranks are computed using {@link NaturalRanking} with default
  32.  * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
  33.  * The ranking algorithm can be set using a constructor argument.
  34.  *
  35.  * @since 2.0
  36.  */
  37. public class SpearmansCorrelation {

  38.     /** Input data. */
  39.     private final RealMatrix data;

  40.     /** Ranking algorithm.  */
  41.     private final RankingAlgorithm rankingAlgorithm;

  42.     /** Rank correlation. */
  43.     private final PearsonsCorrelation rankCorrelation;

  44.     /**
  45.      * Create a SpearmansCorrelation without data.
  46.      */
  47.     public SpearmansCorrelation() {
  48.         this(new NaturalRanking());
  49.     }

  50.     /**
  51.      * Create a SpearmansCorrelation with the given ranking algorithm.
  52.      *
  53.      * @param rankingAlgorithm ranking algorithm
  54.      * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of
  55.      * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy
  56.      * @since 3.1
  57.      */
  58.     public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm)
  59.         throws MathIllegalArgumentException {

  60.         if (rankingAlgorithm instanceof NaturalRanking &&
  61.             NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
  62.             throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY,
  63.                                                    NaNStrategy.REMOVED);
  64.         }

  65.         data = null;
  66.         this.rankingAlgorithm = rankingAlgorithm;
  67.         rankCorrelation = null;
  68.     }

  69.     /**
  70.      * Create a SpearmansCorrelation from the given data matrix.
  71.      *
  72.      * @param dataMatrix matrix of data with columns representing
  73.      * variables to correlate
  74.      */
  75.     public SpearmansCorrelation(final RealMatrix dataMatrix) {
  76.         this(dataMatrix, new NaturalRanking());
  77.     }

  78.     /**
  79.      * Create a SpearmansCorrelation with the given input data matrix
  80.      * and ranking algorithm.
  81.      *
  82.      * @param dataMatrix matrix of data with columns representing
  83.      * variables to correlate
  84.      * @param rankingAlgorithm ranking algorithm
  85.      * @throws MathIllegalArgumentException if the provided {@link RankingAlgorithm} is of
  86.      * type {@link NaturalRanking} and uses a {@link NaNStrategy#REMOVED} strategy
  87.      */
  88.     public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm)
  89.         throws MathIllegalArgumentException {

  90.         if (rankingAlgorithm instanceof NaturalRanking &&
  91.             NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
  92.             throw new MathIllegalArgumentException(LocalizedFormats.NOT_SUPPORTED_NAN_STRATEGY,
  93.                                                    NaNStrategy.REMOVED);
  94.         }

  95.         this.rankingAlgorithm = rankingAlgorithm;
  96.         this.data = rankTransform(dataMatrix);
  97.         rankCorrelation = new PearsonsCorrelation(data);
  98.     }

  99.     /**
  100.      * Calculate the Spearman Rank Correlation Matrix.
  101.      *
  102.      * @return Spearman Rank Correlation Matrix
  103.      * @throws NullPointerException if this instance was created with no data.
  104.      */
  105.     public RealMatrix getCorrelationMatrix() {
  106.         return rankCorrelation.getCorrelationMatrix();
  107.     }

  108.     /**
  109.      * Returns a {@link PearsonsCorrelation} instance constructed from the
  110.      * ranked input data. That is,
  111.      * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
  112.      * is equivalent to
  113.      * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
  114.      * <code>rankTransform(matrix)</code> is the result of applying the
  115.      * configured <code>RankingAlgorithm</code> to each of the columns of
  116.      * <code>matrix.</code>
  117.      *
  118.      * <p>Returns null if this instance was created with no data.</p>
  119.      *
  120.      * @return PearsonsCorrelation among ranked column data
  121.      */
  122.     public PearsonsCorrelation getRankCorrelation() {
  123.         return rankCorrelation;
  124.     }

  125.     /**
  126.      * Computes the Spearman's rank correlation matrix for the columns of the
  127.      * input matrix.
  128.      *
  129.      * @param matrix matrix with columns representing variables to correlate
  130.      * @return correlation matrix
  131.      */
  132.     public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
  133.         final RealMatrix matrixCopy = rankTransform(matrix);
  134.         return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
  135.     }

  136.     /**
  137.      * Computes the Spearman's rank correlation matrix for the columns of the
  138.      * input rectangular array.  The columns of the array represent values
  139.      * of variables to be correlated.
  140.      *
  141.      * @param matrix matrix with columns representing variables to correlate
  142.      * @return correlation matrix
  143.      */
  144.     public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
  145.        return computeCorrelationMatrix(new BlockRealMatrix(matrix));
  146.     }

  147.     /**
  148.      * Computes the Spearman's rank correlation coefficient between the two arrays.
  149.      *
  150.      * @param xArray first data array
  151.      * @param yArray second data array
  152.      * @return Returns Spearman's rank correlation coefficient for the two arrays
  153.      * @throws DimensionMismatchException if the arrays lengths do not match
  154.      * @throws MathIllegalArgumentException if the array length is less than 2
  155.      */
  156.     public double correlation(final double[] xArray, final double[] yArray) {
  157.         if (xArray.length != yArray.length) {
  158.             throw new DimensionMismatchException(xArray.length, yArray.length);
  159.         } else if (xArray.length < 2) {
  160.             throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
  161.                                                    xArray.length, 2);
  162.         } else {
  163.             return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
  164.                                                          rankingAlgorithm.rank(yArray));
  165.         }
  166.     }

  167.     /**
  168.      * Applies rank transform to each of the columns of <code>matrix</code>
  169.      * using the current <code>rankingAlgorithm</code>.
  170.      *
  171.      * @param matrix matrix to transform
  172.      * @return a rank-transformed matrix
  173.      */
  174.     private RealMatrix rankTransform(final RealMatrix matrix) {
  175.         RealMatrix transformed = matrix.copy();
  176.         for (int i = 0; i < transformed.getColumnDimension(); i++) {
  177.             transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
  178.         }

  179.         return transformed;
  180.     }
  181. }