TTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.inference;

  18. import org.apache.commons.statistics.distribution.TDistribution;
  19. import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
  20. import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
  21. import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
  22. import org.apache.commons.math4.legacy.exception.NoDataException;
  23. import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
  24. import org.apache.commons.math4.legacy.exception.NullArgumentException;
  25. import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
  26. import org.apache.commons.math4.legacy.exception.OutOfRangeException;
  27. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
  28. import org.apache.commons.math4.legacy.stat.StatUtils;
  29. import org.apache.commons.math4.legacy.stat.descriptive.StatisticalSummary;
  30. import org.apache.commons.math4.core.jdkmath.JdkMath;

  31. /**
  32.  * An implementation for Student's t-tests.
  33.  * <p>
  34.  * Tests can be:<ul>
  35.  * <li>One-sample or two-sample</li>
  36.  * <li>One-sided or two-sided</li>
  37.  * <li>Paired or unpaired (for two-sample tests)</li>
  38.  * <li>Homoscedastic (equal variance assumption) or heteroscedastic
  39.  * (for two sample tests)</li>
  40.  * <li>Fixed significance level (boolean-valued) or returning p-values.
  41.  * </li></ul>
  42.  * <p>
  43.  * Test statistics are available for all tests.  Methods including "Test" in
  44.  * in their names perform tests, all other methods return t-statistics.  Among
  45.  * the "Test" methods, <code>double-</code>valued methods return p-values;
  46.  * <code>boolean-</code>valued methods perform fixed significance level tests.
  47.  * Significance levels are always specified as numbers between 0 and 0.5
  48.  * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).</p>
  49.  * <p>
  50.  * Input to tests can be either <code>double[]</code> arrays or
  51.  * {@link StatisticalSummary} instances.</p><p>
  52.  * Uses commons-math {@link org.apache.commons.statistics.distribution.TDistribution}
  53.  * implementation to estimate exact p-values.</p>
  54.  *
  55.  */
  56. public class TTest {
  57.     /**
  58.      * Computes a paired, 2-sample t-statistic based on the data in the input
  59.      * arrays.  The t-statistic returned is equivalent to what would be returned by
  60.      * computing the one-sample t-statistic {@link #t(double, double[])}, with
  61.      * <code>mu = 0</code> and the sample array consisting of the (signed)
  62.      * differences between corresponding entries in <code>sample1</code> and
  63.      * <code>sample2.</code>
  64.      * <p>
  65.      * <strong>Preconditions</strong>: <ul>
  66.      * <li>The input arrays must have the same length and their common length
  67.      * must be at least 2.
  68.      * </li></ul>
  69.      *
  70.      * @param sample1 array of sample data values
  71.      * @param sample2 array of sample data values
  72.      * @return t statistic
  73.      * @throws NullArgumentException if the arrays are <code>null</code>
  74.      * @throws NoDataException if the arrays are empty
  75.      * @throws DimensionMismatchException if the length of the arrays is not equal
  76.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  77.      */
  78.     public double pairedT(final double[] sample1, final double[] sample2)
  79.         throws NullArgumentException, NoDataException,
  80.         DimensionMismatchException, NumberIsTooSmallException {

  81.         checkSampleData(sample1);
  82.         checkSampleData(sample2);
  83.         double meanDifference = StatUtils.meanDifference(sample1, sample2);
  84.         return t(meanDifference, 0,
  85.                  StatUtils.varianceDifference(sample1, sample2, meanDifference),
  86.                  sample1.length);
  87.     }

  88.     /**
  89.      * Returns the <i>observed significance level</i>, or
  90.      * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
  91.      * based on the data in the input arrays.
  92.      * <p>
  93.      * The number returned is the smallest significance level
  94.      * at which one can reject the null hypothesis that the mean of the paired
  95.      * differences is 0 in favor of the two-sided alternative that the mean paired
  96.      * difference is not equal to 0. For a one-sided test, divide the returned
  97.      * value by 2.</p>
  98.      * <p>
  99.      * This test is equivalent to a one-sample t-test computed using
  100.      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
  101.      * array consisting of the signed differences between corresponding elements of
  102.      * <code>sample1</code> and <code>sample2.</code></p>
  103.      * <p>
  104.      * <strong>Usage Note:</strong><br>
  105.      * The validity of the p-value depends on the assumptions of the parametric
  106.      * t-test procedure, as discussed
  107.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  108.      * here</a></p>
  109.      * <p>
  110.      * <strong>Preconditions</strong>: <ul>
  111.      * <li>The input array lengths must be the same and their common length must
  112.      * be at least 2.
  113.      * </li></ul>
  114.      *
  115.      * @param sample1 array of sample data values
  116.      * @param sample2 array of sample data values
  117.      * @return p-value for t-test
  118.      * @throws NullArgumentException if the arrays are <code>null</code>
  119.      * @throws NoDataException if the arrays are empty
  120.      * @throws DimensionMismatchException if the length of the arrays is not equal
  121.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  122.      * @throws MaxCountExceededException if an error occurs computing the p-value
  123.      */
  124.     public double pairedTTest(final double[] sample1, final double[] sample2)
  125.         throws NullArgumentException, NoDataException, DimensionMismatchException,
  126.         NumberIsTooSmallException, MaxCountExceededException {

  127.         double meanDifference = StatUtils.meanDifference(sample1, sample2);
  128.         return tTest(meanDifference, 0,
  129.                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
  130.                 sample1.length);
  131.     }

  132.     /**
  133.      * Performs a paired t-test evaluating the null hypothesis that the
  134.      * mean of the paired differences between <code>sample1</code> and
  135.      * <code>sample2</code> is 0 in favor of the two-sided alternative that the
  136.      * mean paired difference is not equal to 0, with significance level
  137.      * <code>alpha</code>.
  138.      * <p>
  139.      * Returns <code>true</code> iff the null hypothesis can be rejected with
  140.      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
  141.      * <code>alpha * 2</code></p>
  142.      * <p>
  143.      * <strong>Usage Note:</strong><br>
  144.      * The validity of the test depends on the assumptions of the parametric
  145.      * t-test procedure, as discussed
  146.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  147.      * here</a></p>
  148.      * <p>
  149.      * <strong>Preconditions</strong>: <ul>
  150.      * <li>The input array lengths must be the same and their common length
  151.      * must be at least 2.
  152.      * </li>
  153.      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
  154.      * </li></ul>
  155.      *
  156.      * @param sample1 array of sample data values
  157.      * @param sample2 array of sample data values
  158.      * @param alpha significance level of the test
  159.      * @return true if the null hypothesis can be rejected with
  160.      * confidence 1 - alpha
  161.      * @throws NullArgumentException if the arrays are <code>null</code>
  162.      * @throws NoDataException if the arrays are empty
  163.      * @throws DimensionMismatchException if the length of the arrays is not equal
  164.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  165.      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
  166.      * @throws MaxCountExceededException if an error occurs computing the p-value
  167.      */
  168.     public boolean pairedTTest(final double[] sample1, final double[] sample2,
  169.                                final double alpha)
  170.         throws NullArgumentException, NoDataException, DimensionMismatchException,
  171.         NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {

  172.         checkSignificanceLevel(alpha);
  173.         return pairedTTest(sample1, sample2) < alpha;
  174.     }

  175.     /**
  176.      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
  177.      * t statistic </a> given observed values and a comparison constant.
  178.      * <p>
  179.      * This statistic can be used to perform a one sample t-test for the mean.
  180.      * </p><p>
  181.      * <strong>Preconditions</strong>: <ul>
  182.      * <li>The observed array length must be at least 2.
  183.      * </li></ul>
  184.      *
  185.      * @param mu comparison constant
  186.      * @param observed array of values
  187.      * @return t statistic
  188.      * @throws NullArgumentException if <code>observed</code> is <code>null</code>
  189.      * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
  190.      */
  191.     public double t(final double mu, final double[] observed)
  192.         throws NullArgumentException, NumberIsTooSmallException {

  193.         checkSampleData(observed);
  194.         // No try-catch or advertised exception because args have just been checked
  195.         return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
  196.                 observed.length);
  197.     }

  198.     /**
  199.      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
  200.      * t statistic </a> to use in comparing the mean of the dataset described by
  201.      * <code>sampleStats</code> to <code>mu</code>.
  202.      * <p>
  203.      * This statistic can be used to perform a one sample t-test for the mean.
  204.      * </p><p>
  205.      * <strong>Preconditions</strong>: <ul>
  206.      * <li><code>observed.getN() &ge; 2</code>.
  207.      * </li></ul>
  208.      *
  209.      * @param mu comparison constant
  210.      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
  211.      * @return t statistic
  212.      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
  213.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  214.      */
  215.     public double t(final double mu, final StatisticalSummary sampleStats)
  216.         throws NullArgumentException, NumberIsTooSmallException {

  217.         checkSampleData(sampleStats);
  218.         return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
  219.                  sampleStats.getN());
  220.     }

  221.     /**
  222.      * Computes a 2-sample t statistic,  under the hypothesis of equal
  223.      * subpopulation variances.  To compute a t-statistic without the
  224.      * equal variances hypothesis, use {@link #t(double[], double[])}.
  225.      * <p>
  226.      * This statistic can be used to perform a (homoscedastic) two-sample
  227.      * t-test to compare sample means.</p>
  228.      * <p>
  229.      * The t-statistic is</p>
  230.      * <p>
  231.      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
  232.      * </p><p>
  233.      * where <strong><code>n1</code></strong> is the size of first sample;
  234.      * <strong><code> n2</code></strong> is the size of second sample;
  235.      * <strong><code> m1</code></strong> is the mean of first sample;
  236.      * <strong><code> m2</code></strong> is the mean of second sample
  237.      * and <strong><code>var</code></strong> is the pooled variance estimate:
  238.      * </p><p>
  239.      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
  240.      * </p><p>
  241.      * with <strong><code>var1</code></strong> the variance of the first sample and
  242.      * <strong><code>var2</code></strong> the variance of the second sample.
  243.      * </p><p>
  244.      * <strong>Preconditions</strong>: <ul>
  245.      * <li>The observed array lengths must both be at least 2.
  246.      * </li></ul>
  247.      *
  248.      * @param sample1 array of sample data values
  249.      * @param sample2 array of sample data values
  250.      * @return t statistic
  251.      * @throws NullArgumentException if the arrays are <code>null</code>
  252.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  253.      */
  254.     public double homoscedasticT(final double[] sample1, final double[] sample2)
  255.         throws NullArgumentException, NumberIsTooSmallException {

  256.         checkSampleData(sample1);
  257.         checkSampleData(sample2);
  258.         // No try-catch or advertised exception because args have just been checked
  259.         return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
  260.                               StatUtils.variance(sample1), StatUtils.variance(sample2),
  261.                               sample1.length, sample2.length);
  262.     }

  263.     /**
  264.      * Computes a 2-sample t statistic, without the hypothesis of equal
  265.      * subpopulation variances.  To compute a t-statistic assuming equal
  266.      * variances, use {@link #homoscedasticT(double[], double[])}.
  267.      * <p>
  268.      * This statistic can be used to perform a two-sample t-test to compare
  269.      * sample means.</p>
  270.      * <p>
  271.      * The t-statistic is</p>
  272.      * <p>
  273.      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
  274.      * </p><p>
  275.      *  where <strong><code>n1</code></strong> is the size of the first sample
  276.      * <strong><code> n2</code></strong> is the size of the second sample;
  277.      * <strong><code> m1</code></strong> is the mean of the first sample;
  278.      * <strong><code> m2</code></strong> is the mean of the second sample;
  279.      * <strong><code> var1</code></strong> is the variance of the first sample;
  280.      * <strong><code> var2</code></strong> is the variance of the second sample;
  281.      * </p><p>
  282.      * <strong>Preconditions</strong>: <ul>
  283.      * <li>The observed array lengths must both be at least 2.
  284.      * </li></ul>
  285.      *
  286.      * @param sample1 array of sample data values
  287.      * @param sample2 array of sample data values
  288.      * @return t statistic
  289.      * @throws NullArgumentException if the arrays are <code>null</code>
  290.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  291.      */
  292.     public double t(final double[] sample1, final double[] sample2)
  293.         throws NullArgumentException, NumberIsTooSmallException {

  294.         checkSampleData(sample1);
  295.         checkSampleData(sample2);
  296.         // No try-catch or advertised exception because args have just been checked
  297.         return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
  298.                  StatUtils.variance(sample1), StatUtils.variance(sample2),
  299.                  sample1.length, sample2.length);
  300.     }

  301.     /**
  302.      * Computes a 2-sample t statistic, comparing the means of the datasets
  303.      * described by two {@link StatisticalSummary} instances, without the
  304.      * assumption of equal subpopulation variances.  Use
  305.      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
  306.      * compute a t-statistic under the equal variances assumption.
  307.      * <p>
  308.      * This statistic can be used to perform a two-sample t-test to compare
  309.      * sample means.</p>
  310.      * <p>
  311.       * The returned  t-statistic is</p>
  312.      * <p>
  313.      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
  314.      * </p><p>
  315.      * where <strong><code>n1</code></strong> is the size of the first sample;
  316.      * <strong><code> n2</code></strong> is the size of the second sample;
  317.      * <strong><code> m1</code></strong> is the mean of the first sample;
  318.      * <strong><code> m2</code></strong> is the mean of the second sample
  319.      * <strong><code> var1</code></strong> is the variance of the first sample;
  320.      * <strong><code> var2</code></strong> is the variance of the second sample
  321.      * </p><p>
  322.      * <strong>Preconditions</strong>: <ul>
  323.      * <li>The datasets described by the two Univariates must each contain
  324.      * at least 2 observations.
  325.      * </li></ul>
  326.      *
  327.      * @param sampleStats1 StatisticalSummary describing data from the first sample
  328.      * @param sampleStats2 StatisticalSummary describing data from the second sample
  329.      * @return t statistic
  330.      * @throws NullArgumentException if the sample statistics are <code>null</code>
  331.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  332.      */
  333.     public double t(final StatisticalSummary sampleStats1,
  334.                     final StatisticalSummary sampleStats2)
  335.         throws NullArgumentException, NumberIsTooSmallException {

  336.         checkSampleData(sampleStats1);
  337.         checkSampleData(sampleStats2);
  338.         return t(sampleStats1.getMean(), sampleStats2.getMean(),
  339.                  sampleStats1.getVariance(), sampleStats2.getVariance(),
  340.                  sampleStats1.getN(), sampleStats2.getN());
  341.     }

  342.     /**
  343.      * Computes a 2-sample t statistic, comparing the means of the datasets
  344.      * described by two {@link StatisticalSummary} instances, under the
  345.      * assumption of equal subpopulation variances.  To compute a t-statistic
  346.      * without the equal variances assumption, use
  347.      * {@link #t(StatisticalSummary, StatisticalSummary)}.
  348.      * <p>
  349.      * This statistic can be used to perform a (homoscedastic) two-sample
  350.      * t-test to compare sample means.</p>
  351.      * <p>
  352.      * The t-statistic returned is</p>
  353.      * <p>
  354.      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
  355.      * </p><p>
  356.      * where <strong><code>n1</code></strong> is the size of first sample;
  357.      * <strong><code> n2</code></strong> is the size of second sample;
  358.      * <strong><code> m1</code></strong> is the mean of first sample;
  359.      * <strong><code> m2</code></strong> is the mean of second sample
  360.      * and <strong><code>var</code></strong> is the pooled variance estimate:
  361.      * </p><p>
  362.      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
  363.      * </p><p>
  364.      * with <strong><code>var1</code></strong> the variance of the first sample and
  365.      * <strong><code>var2</code></strong> the variance of the second sample.
  366.      * </p><p>
  367.      * <strong>Preconditions</strong>: <ul>
  368.      * <li>The datasets described by the two Univariates must each contain
  369.      * at least 2 observations.
  370.      * </li></ul>
  371.      *
  372.      * @param sampleStats1 StatisticalSummary describing data from the first sample
  373.      * @param sampleStats2 StatisticalSummary describing data from the second sample
  374.      * @return t statistic
  375.      * @throws NullArgumentException if the sample statistics are <code>null</code>
  376.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  377.      */
  378.     public double homoscedasticT(final StatisticalSummary sampleStats1,
  379.                                  final StatisticalSummary sampleStats2)
  380.         throws NullArgumentException, NumberIsTooSmallException {

  381.         checkSampleData(sampleStats1);
  382.         checkSampleData(sampleStats2);
  383.         return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
  384.                               sampleStats1.getVariance(), sampleStats2.getVariance(),
  385.                               sampleStats1.getN(), sampleStats2.getN());
  386.     }

  387.     /**
  388.      * Returns the <i>observed significance level</i>, or
  389.      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
  390.      * comparing the mean of the input array with the constant <code>mu</code>.
  391.      * <p>
  392.      * The number returned is the smallest significance level
  393.      * at which one can reject the null hypothesis that the mean equals
  394.      * <code>mu</code> in favor of the two-sided alternative that the mean
  395.      * is different from <code>mu</code>. For a one-sided test, divide the
  396.      * returned value by 2.</p>
  397.      * <p>
  398.      * <strong>Usage Note:</strong><br>
  399.      * The validity of the test depends on the assumptions of the parametric
  400.      * t-test procedure, as discussed
  401.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
  402.      * </p><p>
  403.      * <strong>Preconditions</strong>: <ul>
  404.      * <li>The observed array length must be at least 2.
  405.      * </li></ul>
  406.      *
  407.      * @param mu constant value to compare sample mean against
  408.      * @param sample array of sample data values
  409.      * @return p-value
  410.      * @throws NullArgumentException if the sample array is <code>null</code>
  411.      * @throws NumberIsTooSmallException if the length of the array is &lt; 2
  412.      * @throws MaxCountExceededException if an error occurs computing the p-value
  413.      */
  414.     public double tTest(final double mu, final double[] sample)
  415.         throws NullArgumentException, NumberIsTooSmallException,
  416.         MaxCountExceededException {

  417.         checkSampleData(sample);
  418.         // No try-catch or advertised exception because args have just been checked
  419.         return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
  420.                      sample.length);
  421.     }

  422.     /**
  423.      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
  424.      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
  425.      * which <code>sample</code> is drawn equals <code>mu</code>.
  426.      * <p>
  427.      * Returns <code>true</code> iff the null hypothesis can be
  428.      * rejected with confidence <code>1 - alpha</code>.  To
  429.      * perform a 1-sided test, use <code>alpha * 2</code></p>
  430.      * <p>
  431.      * <strong>Examples:</strong><br><ol>
  432.      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
  433.      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
  434.      * </li>
  435.      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
  436.      * at the 99% level, first verify that the measured sample mean is less
  437.      * than <code>mu</code> and then use
  438.      * <br><code>tTest(mu, sample, 0.02) </code>
  439.      * </li></ol>
  440.      * <p>
  441.      * <strong>Usage Note:</strong><br>
  442.      * The validity of the test depends on the assumptions of the one-sample
  443.      * parametric t-test procedure, as discussed
  444.      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
  445.      * </p><p>
  446.      * <strong>Preconditions</strong>: <ul>
  447.      * <li>The observed array length must be at least 2.
  448.      * </li></ul>
  449.      *
  450.      * @param mu constant value to compare sample mean against
  451.      * @param sample array of sample data values
  452.      * @param alpha significance level of the test
  453.      * @return p-value
  454.      * @throws NullArgumentException if the sample array is <code>null</code>
  455.      * @throws NumberIsTooSmallException if the length of the array is &lt; 2
  456.      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
  457.      * @throws MaxCountExceededException if an error computing the p-value
  458.      */
  459.     public boolean tTest(final double mu, final double[] sample, final double alpha)
  460.         throws NullArgumentException, NumberIsTooSmallException,
  461.         OutOfRangeException, MaxCountExceededException {

  462.         checkSignificanceLevel(alpha);
  463.         return tTest(mu, sample) < alpha;
  464.     }

  465.     /**
  466.      * Returns the <i>observed significance level</i>, or
  467.      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
  468.      * comparing the mean of the dataset described by <code>sampleStats</code>
  469.      * with the constant <code>mu</code>.
  470.      * <p>
  471.      * The number returned is the smallest significance level
  472.      * at which one can reject the null hypothesis that the mean equals
  473.      * <code>mu</code> in favor of the two-sided alternative that the mean
  474.      * is different from <code>mu</code>. For a one-sided test, divide the
  475.      * returned value by 2.</p>
  476.      * <p>
  477.      * <strong>Usage Note:</strong><br>
  478.      * The validity of the test depends on the assumptions of the parametric
  479.      * t-test procedure, as discussed
  480.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  481.      * here</a></p>
  482.      * <p>
  483.      * <strong>Preconditions</strong>: <ul>
  484.      * <li>The sample must contain at least 2 observations.
  485.      * </li></ul>
  486.      *
  487.      * @param mu constant value to compare sample mean against
  488.      * @param sampleStats StatisticalSummary describing sample data
  489.      * @return p-value
  490.      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
  491.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  492.      * @throws MaxCountExceededException if an error occurs computing the p-value
  493.      */
  494.     public double tTest(final double mu, final StatisticalSummary sampleStats)
  495.         throws NullArgumentException, NumberIsTooSmallException,
  496.         MaxCountExceededException {

  497.         checkSampleData(sampleStats);
  498.         return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
  499.                      sampleStats.getN());
  500.     }

  501.     /**
  502.      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
  503.      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
  504.      * population from which the dataset described by <code>stats</code> is
  505.      * drawn equals <code>mu</code>.
  506.      * <p>
  507.      * Returns <code>true</code> iff the null hypothesis can be rejected with
  508.      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
  509.      * <code>alpha * 2.</code></p>
  510.      * <p>
  511.      * <strong>Examples:</strong><br><ol>
  512.      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
  513.      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
  514.      * </li>
  515.      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
  516.      * at the 99% level, first verify that the measured sample mean is less
  517.      * than <code>mu</code> and then use
  518.      * <br><code>tTest(mu, sampleStats, 0.02) </code>
  519.      * </li></ol>
  520.      * <p>
  521.      * <strong>Usage Note:</strong><br>
  522.      * The validity of the test depends on the assumptions of the one-sample
  523.      * parametric t-test procedure, as discussed
  524.      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
  525.      * </p><p>
  526.      * <strong>Preconditions</strong>: <ul>
  527.      * <li>The sample must include at least 2 observations.
  528.      * </li></ul>
  529.      *
  530.      * @param mu constant value to compare sample mean against
  531.      * @param sampleStats StatisticalSummary describing sample data values
  532.      * @param alpha significance level of the test
  533.      * @return p-value
  534.      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
  535.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  536.      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
  537.      * @throws MaxCountExceededException if an error occurs computing the p-value
  538.      */
  539.     public boolean tTest(final double mu, final StatisticalSummary sampleStats,
  540.                          final double alpha)
  541.     throws NullArgumentException, NumberIsTooSmallException,
  542.     OutOfRangeException, MaxCountExceededException {

  543.         checkSignificanceLevel(alpha);
  544.         return tTest(mu, sampleStats) < alpha;
  545.     }

  546.     /**
  547.      * Returns the <i>observed significance level</i>, or
  548.      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
  549.      * comparing the means of the input arrays.
  550.      * <p>
  551.      * The number returned is the smallest significance level
  552.      * at which one can reject the null hypothesis that the two means are
  553.      * equal in favor of the two-sided alternative that they are different.
  554.      * For a one-sided test, divide the returned value by 2.</p>
  555.      * <p>
  556.      * The test does not assume that the underlying popuation variances are
  557.      * equal  and it uses approximated degrees of freedom computed from the
  558.      * sample data to compute the p-value.  The t-statistic used is as defined in
  559.      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
  560.      * to the degrees of freedom is used,
  561.      * as described
  562.      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
  563.      * here.</a>  To perform the test under the assumption of equal subpopulation
  564.      * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
  565.      * <p>
  566.      * <strong>Usage Note:</strong><br>
  567.      * The validity of the p-value depends on the assumptions of the parametric
  568.      * t-test procedure, as discussed
  569.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  570.      * here</a></p>
  571.      * <p>
  572.      * <strong>Preconditions</strong>: <ul>
  573.      * <li>The observed array lengths must both be at least 2.
  574.      * </li></ul>
  575.      *
  576.      * @param sample1 array of sample data values
  577.      * @param sample2 array of sample data values
  578.      * @return p-value for t-test
  579.      * @throws NullArgumentException if the arrays are <code>null</code>
  580.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  581.      * @throws MaxCountExceededException if an error occurs computing the p-value
  582.      */
  583.     public double tTest(final double[] sample1, final double[] sample2)
  584.         throws NullArgumentException, NumberIsTooSmallException,
  585.         MaxCountExceededException {

  586.         checkSampleData(sample1);
  587.         checkSampleData(sample2);
  588.         // No try-catch or advertised exception because args have just been checked
  589.         return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
  590.                      StatUtils.variance(sample1), StatUtils.variance(sample2),
  591.                      sample1.length, sample2.length);
  592.     }

  593.     /**
  594.      * Returns the <i>observed significance level</i>, or
  595.      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
  596.      * comparing the means of the input arrays, under the assumption that
  597.      * the two samples are drawn from subpopulations with equal variances.
  598.      * To perform the test without the equal variances assumption, use
  599.      * {@link #tTest(double[], double[])}.
  600.      * <p>
  601.      * The number returned is the smallest significance level
  602.      * at which one can reject the null hypothesis that the two means are
  603.      * equal in favor of the two-sided alternative that they are different.
  604.      * For a one-sided test, divide the returned value by 2.</p>
  605.      * <p>
  606.      * A pooled variance estimate is used to compute the t-statistic.  See
  607.      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
  608.      * minus 2 is used as the degrees of freedom.</p>
  609.      * <p>
  610.      * <strong>Usage Note:</strong><br>
  611.      * The validity of the p-value depends on the assumptions of the parametric
  612.      * t-test procedure, as discussed
  613.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  614.      * here</a></p>
  615.      * <p>
  616.      * <strong>Preconditions</strong>: <ul>
  617.      * <li>The observed array lengths must both be at least 2.
  618.      * </li></ul>
  619.      *
  620.      * @param sample1 array of sample data values
  621.      * @param sample2 array of sample data values
  622.      * @return p-value for t-test
  623.      * @throws NullArgumentException if the arrays are <code>null</code>
  624.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  625.      * @throws MaxCountExceededException if an error occurs computing the p-value
  626.      */
  627.     public double homoscedasticTTest(final double[] sample1, final double[] sample2)
  628.         throws NullArgumentException, NumberIsTooSmallException,
  629.         MaxCountExceededException {

  630.         checkSampleData(sample1);
  631.         checkSampleData(sample2);
  632.         // No try-catch or advertised exception because args have just been checked
  633.         return homoscedasticTTest(StatUtils.mean(sample1),
  634.                                   StatUtils.mean(sample2),
  635.                                   StatUtils.variance(sample1),
  636.                                   StatUtils.variance(sample2),
  637.                                   sample1.length, sample2.length);
  638.     }

  639.     /**
  640.      * Performs a
  641.      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
  642.      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
  643.      * and <code>sample2</code> are drawn from populations with the same mean,
  644.      * with significance level <code>alpha</code>.  This test does not assume
  645.      * that the subpopulation variances are equal.  To perform the test assuming
  646.      * equal variances, use
  647.      * {@link #homoscedasticTTest(double[], double[], double)}.
  648.      * <p>
  649.      * Returns <code>true</code> iff the null hypothesis that the means are
  650.      * equal can be rejected with confidence <code>1 - alpha</code>.  To
  651.      * perform a 1-sided test, use <code>alpha * 2</code></p>
  652.      * <p>
  653.      * See {@link #t(double[], double[])} for the formula used to compute the
  654.      * t-statistic.  Degrees of freedom are approximated using the
  655.      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
  656.      * Welch-Satterthwaite approximation.</a></p>
  657.      * <p>
  658.      * <strong>Examples:</strong><br><ol>
  659.      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
  660.      * the 95% level,  use
  661.      * <br><code>tTest(sample1, sample2, 0.05). </code>
  662.      * </li>
  663.      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>,
  664.      * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
  665.      * is less than the mean of <code>sample 2</code> and then use
  666.      * <br><code>tTest(sample1, sample2, 0.02) </code>
  667.      * </li></ol>
  668.      * <p>
  669.      * <strong>Usage Note:</strong><br>
  670.      * The validity of the test depends on the assumptions of the parametric
  671.      * t-test procedure, as discussed
  672.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  673.      * here</a></p>
  674.      * <p>
  675.      * <strong>Preconditions</strong>: <ul>
  676.      * <li>The observed array lengths must both be at least 2.
  677.      * </li>
  678.      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
  679.      * </li></ul>
  680.      *
  681.      * @param sample1 array of sample data values
  682.      * @param sample2 array of sample data values
  683.      * @param alpha significance level of the test
  684.      * @return true if the null hypothesis can be rejected with
  685.      * confidence 1 - alpha
  686.      * @throws NullArgumentException if the arrays are <code>null</code>
  687.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  688.      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
  689.      * @throws MaxCountExceededException if an error occurs computing the p-value
  690.      */
  691.     public boolean tTest(final double[] sample1, final double[] sample2,
  692.                          final double alpha)
  693.         throws NullArgumentException, NumberIsTooSmallException,
  694.         OutOfRangeException, MaxCountExceededException {

  695.         checkSignificanceLevel(alpha);
  696.         return tTest(sample1, sample2) < alpha;
  697.     }

  698.     /**
  699.      * Performs a
  700.      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
  701.      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
  702.      * and <code>sample2</code> are drawn from populations with the same mean,
  703.      * with significance level <code>alpha</code>,  assuming that the
  704.      * subpopulation variances are equal.  Use
  705.      * {@link #tTest(double[], double[], double)} to perform the test without
  706.      * the assumption of equal variances.
  707.      * <p>
  708.      * Returns <code>true</code> iff the null hypothesis that the means are
  709.      * equal can be rejected with confidence <code>1 - alpha</code>.  To
  710.      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
  711.      * without the assumption of equal subpopulation variances, use
  712.      * {@link #tTest(double[], double[], double)}.</p>
  713.      * <p>
  714.      * A pooled variance estimate is used to compute the t-statistic. See
  715.      * {@link #t(double[], double[])} for the formula. The sum of the sample
  716.      * sizes minus 2 is used as the degrees of freedom.</p>
  717.      * <p>
  718.      * <strong>Examples:</strong><br><ol>
  719.      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
  720.      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
  721.      * </li>
  722.      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2, </code>
  723.      * at the 99% level, first verify that the measured mean of
  724.      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
  725.      * and then use
  726.      * <br><code>tTest(sample1, sample2, 0.02) </code>
  727.      * </li></ol>
  728.      * <p>
  729.      * <strong>Usage Note:</strong><br>
  730.      * The validity of the test depends on the assumptions of the parametric
  731.      * t-test procedure, as discussed
  732.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  733.      * here</a></p>
  734.      * <p>
  735.      * <strong>Preconditions</strong>: <ul>
  736.      * <li>The observed array lengths must both be at least 2.
  737.      * </li>
  738.      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
  739.      * </li></ul>
  740.      *
  741.      * @param sample1 array of sample data values
  742.      * @param sample2 array of sample data values
  743.      * @param alpha significance level of the test
  744.      * @return true if the null hypothesis can be rejected with
  745.      * confidence 1 - alpha
  746.      * @throws NullArgumentException if the arrays are <code>null</code>
  747.      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
  748.      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
  749.      * @throws MaxCountExceededException if an error occurs computing the p-value
  750.      */
  751.     public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
  752.                                       final double alpha)
  753.         throws NullArgumentException, NumberIsTooSmallException,
  754.         OutOfRangeException, MaxCountExceededException {

  755.         checkSignificanceLevel(alpha);
  756.         return homoscedasticTTest(sample1, sample2) < alpha;
  757.     }

  758.     /**
  759.      * Returns the <i>observed significance level</i>, or
  760.      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
  761.      * comparing the means of the datasets described by two StatisticalSummary
  762.      * instances.
  763.      * <p>
  764.      * The number returned is the smallest significance level
  765.      * at which one can reject the null hypothesis that the two means are
  766.      * equal in favor of the two-sided alternative that they are different.
  767.      * For a one-sided test, divide the returned value by 2.</p>
  768.      * <p>
  769.      * The test does not assume that the underlying population variances are
  770.      * equal  and it uses approximated degrees of freedom computed from the
  771.      * sample data to compute the p-value.   To perform the test assuming
  772.      * equal variances, use
  773.      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
  774.      * <p>
  775.      * <strong>Usage Note:</strong><br>
  776.      * The validity of the p-value depends on the assumptions of the parametric
  777.      * t-test procedure, as discussed
  778.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  779.      * here</a></p>
  780.      * <p>
  781.      * <strong>Preconditions</strong>: <ul>
  782.      * <li>The datasets described by the two Univariates must each contain
  783.      * at least 2 observations.
  784.      * </li></ul>
  785.      *
  786.      * @param sampleStats1  StatisticalSummary describing data from the first sample
  787.      * @param sampleStats2  StatisticalSummary describing data from the second sample
  788.      * @return p-value for t-test
  789.      * @throws NullArgumentException if the sample statistics are <code>null</code>
  790.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  791.      * @throws MaxCountExceededException if an error occurs computing the p-value
  792.      */
  793.     public double tTest(final StatisticalSummary sampleStats1,
  794.                         final StatisticalSummary sampleStats2)
  795.         throws NullArgumentException, NumberIsTooSmallException,
  796.         MaxCountExceededException {

  797.         checkSampleData(sampleStats1);
  798.         checkSampleData(sampleStats2);
  799.         return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
  800.                      sampleStats1.getVariance(), sampleStats2.getVariance(),
  801.                      sampleStats1.getN(), sampleStats2.getN());
  802.     }

  803.     /**
  804.      * Returns the <i>observed significance level</i>, or
  805.      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
  806.      * comparing the means of the datasets described by two StatisticalSummary
  807.      * instances, under the hypothesis of equal subpopulation variances. To
  808.      * perform a test without the equal variances assumption, use
  809.      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
  810.      * <p>
  811.      * The number returned is the smallest significance level
  812.      * at which one can reject the null hypothesis that the two means are
  813.      * equal in favor of the two-sided alternative that they are different.
  814.      * For a one-sided test, divide the returned value by 2.</p>
  815.      * <p>
  816.      * See {@link #homoscedasticT(double[], double[])} for the formula used to
  817.      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
  818.      * the degrees of freedom.</p>
  819.      * <p>
  820.      * <strong>Usage Note:</strong><br>
  821.      * The validity of the p-value depends on the assumptions of the parametric
  822.      * t-test procedure, as discussed
  823.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
  824.      * </p><p>
  825.      * <strong>Preconditions</strong>: <ul>
  826.      * <li>The datasets described by the two Univariates must each contain
  827.      * at least 2 observations.
  828.      * </li></ul>
  829.      *
  830.      * @param sampleStats1  StatisticalSummary describing data from the first sample
  831.      * @param sampleStats2  StatisticalSummary describing data from the second sample
  832.      * @return p-value for t-test
  833.      * @throws NullArgumentException if the sample statistics are <code>null</code>
  834.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  835.      * @throws MaxCountExceededException if an error occurs computing the p-value
  836.      */
  837.     public double homoscedasticTTest(final StatisticalSummary sampleStats1,
  838.                                      final StatisticalSummary sampleStats2)
  839.         throws NullArgumentException, NumberIsTooSmallException,
  840.         MaxCountExceededException {

  841.         checkSampleData(sampleStats1);
  842.         checkSampleData(sampleStats2);
  843.         return homoscedasticTTest(sampleStats1.getMean(),
  844.                                   sampleStats2.getMean(),
  845.                                   sampleStats1.getVariance(),
  846.                                   sampleStats2.getVariance(),
  847.                                   sampleStats1.getN(), sampleStats2.getN());
  848.     }

  849.     /**
  850.      * Performs a
  851.      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
  852.      * two-sided t-test</a> evaluating the null hypothesis that
  853.      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
  854.      * datasets drawn from populations with the same mean, with significance
  855.      * level <code>alpha</code>.   This test does not assume that the
  856.      * subpopulation variances are equal.  To perform the test under the equal
  857.      * variances assumption, use
  858.      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
  859.      * <p>
  860.      * Returns <code>true</code> iff the null hypothesis that the means are
  861.      * equal can be rejected with confidence <code>1 - alpha</code>.  To
  862.      * perform a 1-sided test, use <code>alpha * 2</code></p>
  863.      * <p>
  864.      * See {@link #t(double[], double[])} for the formula used to compute the
  865.      * t-statistic.  Degrees of freedom are approximated using the
  866.      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
  867.      * Welch-Satterthwaite approximation.</a></p>
  868.      * <p>
  869.      * <strong>Examples:</strong><br><ol>
  870.      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
  871.      * the 95%, use
  872.      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
  873.      * </li>
  874.      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>
  875.      * at the 99% level,  first verify that the measured mean of
  876.      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
  877.      * and then use
  878.      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
  879.      * </li></ol>
  880.      * <p>
  881.      * <strong>Usage Note:</strong><br>
  882.      * The validity of the test depends on the assumptions of the parametric
  883.      * t-test procedure, as discussed
  884.      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
  885.      * here</a></p>
  886.      * <p>
  887.      * <strong>Preconditions</strong>: <ul>
  888.      * <li>The datasets described by the two Univariates must each contain
  889.      * at least 2 observations.
  890.      * </li>
  891.      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
  892.      * </li></ul>
  893.      *
  894.      * @param sampleStats1 StatisticalSummary describing sample data values
  895.      * @param sampleStats2 StatisticalSummary describing sample data values
  896.      * @param alpha significance level of the test
  897.      * @return true if the null hypothesis can be rejected with
  898.      * confidence 1 - alpha
  899.      * @throws NullArgumentException if the sample statistics are <code>null</code>
  900.      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
  901.      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
  902.      * @throws MaxCountExceededException if an error occurs computing the p-value
  903.      */
  904.     public boolean tTest(final StatisticalSummary sampleStats1,
  905.                          final StatisticalSummary sampleStats2,
  906.                          final double alpha)
  907.         throws NullArgumentException, NumberIsTooSmallException,
  908.         OutOfRangeException, MaxCountExceededException {

  909.         checkSignificanceLevel(alpha);
  910.         return tTest(sampleStats1, sampleStats2) < alpha;
  911.     }

  912.     //----------------------------------------------- Protected methods

  913.     /**
  914.      * Computes approximate degrees of freedom for 2-sample t-test.
  915.      *
  916.      * @param v1 first sample variance
  917.      * @param v2 second sample variance
  918.      * @param n1 first sample n
  919.      * @param n2 second sample n
  920.      * @return approximate degrees of freedom
  921.      */
  922.     protected double df(double v1, double v2, double n1, double n2) {
  923.         return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
  924.         ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
  925.                 (n2 * n2 * (n2 - 1d)));
  926.     }

  927.     /**
  928.      * Computes t test statistic for 1-sample t-test.
  929.      *
  930.      * @param m sample mean
  931.      * @param mu constant to test against
  932.      * @param v sample variance
  933.      * @param n sample n
  934.      * @return t test statistic
  935.      */
  936.     protected double t(final double m, final double mu,
  937.                        final double v, final double n) {
  938.         return (m - mu) / JdkMath.sqrt(v / n);
  939.     }

  940.     /**
  941.      * Computes t test statistic for 2-sample t-test.
  942.      * <p>
  943.      * Does not assume that subpopulation variances are equal.</p>
  944.      *
  945.      * @param m1 first sample mean
  946.      * @param m2 second sample mean
  947.      * @param v1 first sample variance
  948.      * @param v2 second sample variance
  949.      * @param n1 first sample n
  950.      * @param n2 second sample n
  951.      * @return t test statistic
  952.      */
  953.     protected double t(final double m1, final double m2,
  954.                        final double v1, final double v2,
  955.                        final double n1, final double n2)  {
  956.         return (m1 - m2) / JdkMath.sqrt((v1 / n1) + (v2 / n2));
  957.     }

  958.     /**
  959.      * Computes t test statistic for 2-sample t-test under the hypothesis
  960.      * of equal subpopulation variances.
  961.      *
  962.      * @param m1 first sample mean
  963.      * @param m2 second sample mean
  964.      * @param v1 first sample variance
  965.      * @param v2 second sample variance
  966.      * @param n1 first sample n
  967.      * @param n2 second sample n
  968.      * @return t test statistic
  969.      */
  970.     protected double homoscedasticT(final double m1, final double m2,
  971.                                     final double v1, final double v2,
  972.                                     final double n1, final double n2)  {
  973.         final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
  974.         return (m1 - m2) / JdkMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
  975.     }

  976.     /**
  977.      * Computes p-value for 2-sided, 1-sample t-test.
  978.      *
  979.      * @param m sample mean
  980.      * @param mu constant to test against
  981.      * @param v sample variance
  982.      * @param n sample n
  983.      * @return p-value
  984.      * @throws MaxCountExceededException if an error occurs computing the p-value
  985.      * @throws MathIllegalArgumentException if n is not greater than 1
  986.      */
  987.     protected double tTest(final double m, final double mu,
  988.                            final double v, final double n)
  989.         throws MaxCountExceededException, MathIllegalArgumentException {

  990.         final double t = JdkMath.abs(t(m, mu, v, n));
  991.         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
  992.         final TDistribution distribution = TDistribution.of(n - 1);
  993.         return 2.0 * distribution.cumulativeProbability(-t);
  994.     }

  995.     /**
  996.      * Computes p-value for 2-sided, 2-sample t-test.
  997.      * <p>
  998.      * Does not assume subpopulation variances are equal. Degrees of freedom
  999.      * are estimated from the data.</p>
  1000.      *
  1001.      * @param m1 first sample mean
  1002.      * @param m2 second sample mean
  1003.      * @param v1 first sample variance
  1004.      * @param v2 second sample variance
  1005.      * @param n1 first sample n
  1006.      * @param n2 second sample n
  1007.      * @return p-value
  1008.      * @throws MaxCountExceededException if an error occurs computing the p-value
  1009.      * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
  1010.      * strictly positive
  1011.      */
  1012.     protected double tTest(final double m1, final double m2,
  1013.                            final double v1, final double v2,
  1014.                            final double n1, final double n2)
  1015.         throws MaxCountExceededException, NotStrictlyPositiveException {

  1016.         final double t = JdkMath.abs(t(m1, m2, v1, v2, n1, n2));
  1017.         final double degreesOfFreedom = df(v1, v2, n1, n2);
  1018.         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
  1019.         final TDistribution distribution = TDistribution.of(degreesOfFreedom);
  1020.         return 2.0 * distribution.cumulativeProbability(-t);
  1021.     }

  1022.     /**
  1023.      * Computes p-value for 2-sided, 2-sample t-test, under the assumption
  1024.      * of equal subpopulation variances.
  1025.      * <p>
  1026.      * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
  1027.      *
  1028.      * @param m1 first sample mean
  1029.      * @param m2 second sample mean
  1030.      * @param v1 first sample variance
  1031.      * @param v2 second sample variance
  1032.      * @param n1 first sample n
  1033.      * @param n2 second sample n
  1034.      * @return p-value
  1035.      * @throws MaxCountExceededException if an error occurs computing the p-value
  1036.      * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
  1037.      * strictly positive
  1038.      */
  1039.     protected double homoscedasticTTest(double m1, double m2,
  1040.                                         double v1, double v2,
  1041.                                         double n1, double n2)
  1042.         throws MaxCountExceededException, NotStrictlyPositiveException {

  1043.         final double t = JdkMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
  1044.         final double degreesOfFreedom = n1 + n2 - 2;
  1045.         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
  1046.         final TDistribution distribution = TDistribution.of(degreesOfFreedom);
  1047.         return 2.0 * distribution.cumulativeProbability(-t);
  1048.     }

  1049.     /**
  1050.      * Check significance level.
  1051.      *
  1052.      * @param alpha significance level
  1053.      * @throws OutOfRangeException if the significance level is out of bounds.
  1054.      */
  1055.     private void checkSignificanceLevel(final double alpha)
  1056.         throws OutOfRangeException {

  1057.         if (alpha <= 0 || alpha > 0.5) {
  1058.             throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
  1059.                                           alpha, 0.0, 0.5);
  1060.         }
  1061.     }

  1062.     /**
  1063.      * Check sample data.
  1064.      *
  1065.      * @param data Sample data.
  1066.      * @throws NullArgumentException if {@code data} is {@code null}.
  1067.      * @throws NumberIsTooSmallException if there is not enough sample data.
  1068.      */
  1069.     private void checkSampleData(final double[] data)
  1070.         throws NullArgumentException, NumberIsTooSmallException {

  1071.         if (data == null) {
  1072.             throw new NullArgumentException();
  1073.         }
  1074.         if (data.length < 2) {
  1075.             throw new NumberIsTooSmallException(
  1076.                     LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
  1077.                     data.length, 2, true);
  1078.         }
  1079.     }

  1080.     /**
  1081.      * Check sample data.
  1082.      *
  1083.      * @param stat Statistical summary.
  1084.      * @throws NullArgumentException if {@code data} is {@code null}.
  1085.      * @throws NumberIsTooSmallException if there is not enough sample data.
  1086.      */
  1087.     private void checkSampleData(final StatisticalSummary stat)
  1088.         throws NullArgumentException, NumberIsTooSmallException {

  1089.         if (stat == null) {
  1090.             throw new NullArgumentException();
  1091.         }
  1092.         if (stat.getN() < 2) {
  1093.             throw new NumberIsTooSmallException(
  1094.                     LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
  1095.                     stat.getN(), 2, true);
  1096.         }
  1097.     }
  1098. }