StorelessBivariateCovariance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.correlation;

  18. import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
  19. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;

  20. /**
  21.  * Bivariate Covariance implementation that does not require input data to be
  22.  * stored in memory.
  23.  *
  24.  * <p>This class is based on a paper written by Philippe P&eacute;bay:
  25.  * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
  26.  * Formulas for Robust, One-Pass Parallel Computation of Covariances and
  27.  * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
  28.  * Sandia National Laboratories. It computes the covariance for a pair of variables.
  29.  * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
  30.  *
  31.  * <p>Note: This class is package private as it is only used internally in
  32.  * the {@link StorelessCovariance} class.</p>
  33.  *
  34.  * @since 3.0
  35.  */
  36. class StorelessBivariateCovariance {

  37.     /** the mean of variable x. */
  38.     private double meanX;

  39.     /** the mean of variable y. */
  40.     private double meanY;

  41.     /** number of observations. */
  42.     private double n;

  43.     /** the running covariance estimate. */
  44.     private double covarianceNumerator;

  45.     /** flag for bias correction. */
  46.     private boolean biasCorrected;

  47.     /**
  48.      * Create an empty {@link StorelessBivariateCovariance} instance with
  49.      * bias correction.
  50.      */
  51.     StorelessBivariateCovariance() {
  52.         this(true);
  53.     }

  54.     /**
  55.      * Create an empty {@link StorelessBivariateCovariance} instance.
  56.      *
  57.      * @param biasCorrection if <code>true</code> the covariance estimate is corrected
  58.      * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
  59.      * i.e. n in the denominator.
  60.      */
  61.     StorelessBivariateCovariance(final boolean biasCorrection) {
  62.         meanX = meanY = 0.0;
  63.         n = 0;
  64.         covarianceNumerator = 0.0;
  65.         biasCorrected = biasCorrection;
  66.     }

  67.     /**
  68.      * Update the covariance estimation with a pair of variables (x, y).
  69.      *
  70.      * @param x the x value
  71.      * @param y the y value
  72.      */
  73.     public void increment(final double x, final double y) {
  74.         n++;
  75.         final double deltaX = x - meanX;
  76.         final double deltaY = y - meanY;
  77.         meanX += deltaX / n;
  78.         meanY += deltaY / n;
  79.         covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
  80.     }

  81.     /**
  82.      * Appends another bivariate covariance calculation to this.
  83.      * After this operation, statistics returned should be close to what would
  84.      * have been obtained by by performing all of the {@link #increment(double, double)}
  85.      * operations in {@code cov} directly on this.
  86.      *
  87.      * @param cov StorelessBivariateCovariance instance to append.
  88.      */
  89.     public void append(StorelessBivariateCovariance cov) {
  90.         double oldN = n;
  91.         n += cov.n;
  92.         final double deltaX = cov.meanX - meanX;
  93.         final double deltaY = cov.meanY - meanY;
  94.         meanX += deltaX * cov.n / n;
  95.         meanY += deltaY * cov.n / n;
  96.         covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
  97.     }

  98.     /**
  99.      * Returns the number of observations.
  100.      *
  101.      * @return number of observations
  102.      */
  103.     public double getN() {
  104.         return n;
  105.     }

  106.     /**
  107.      * Return the current covariance estimate.
  108.      *
  109.      * @return the current covariance
  110.      * @throws NumberIsTooSmallException if the number of observations
  111.      * is &lt; 2
  112.      */
  113.     public double getResult() throws NumberIsTooSmallException {
  114.         if (n < 2) {
  115.             throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION,
  116.                                                 n, 2, true);
  117.         }
  118.         if (biasCorrected) {
  119.             return covarianceNumerator / (n - 1d);
  120.         } else {
  121.             return covarianceNumerator / n;
  122.         }
  123.     }
  124. }