001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.legacy.stat.inference;
018
019import org.apache.commons.statistics.distribution.TDistribution;
020import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
021import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
022import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
023import org.apache.commons.math4.legacy.exception.NoDataException;
024import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
025import org.apache.commons.math4.legacy.exception.NullArgumentException;
026import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
027import org.apache.commons.math4.legacy.exception.OutOfRangeException;
028import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
029import org.apache.commons.math4.legacy.stat.StatUtils;
030import org.apache.commons.math4.legacy.stat.descriptive.StatisticalSummary;
031import org.apache.commons.math4.core.jdkmath.JdkMath;
032
033/**
034 * An implementation for Student's t-tests.
035 * <p>
036 * Tests can be:<ul>
037 * <li>One-sample or two-sample</li>
038 * <li>One-sided or two-sided</li>
039 * <li>Paired or unpaired (for two-sample tests)</li>
040 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
041 * (for two sample tests)</li>
042 * <li>Fixed significance level (boolean-valued) or returning p-values.
043 * </li></ul>
044 * <p>
045 * Test statistics are available for all tests.  Methods including "Test" in
046 * in their names perform tests, all other methods return t-statistics.  Among
047 * the "Test" methods, <code>double-</code>valued methods return p-values;
048 * <code>boolean-</code>valued methods perform fixed significance level tests.
049 * Significance levels are always specified as numbers between 0 and 0.5
050 * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).</p>
051 * <p>
052 * Input to tests can be either <code>double[]</code> arrays or
053 * {@link StatisticalSummary} instances.</p><p>
054 * Uses commons-math {@link org.apache.commons.statistics.distribution.TDistribution}
055 * implementation to estimate exact p-values.</p>
056 *
057 */
058public class TTest {
059    /**
060     * Computes a paired, 2-sample t-statistic based on the data in the input
061     * arrays.  The t-statistic returned is equivalent to what would be returned by
062     * computing the one-sample t-statistic {@link #t(double, double[])}, with
063     * <code>mu = 0</code> and the sample array consisting of the (signed)
064     * differences between corresponding entries in <code>sample1</code> and
065     * <code>sample2.</code>
066     * <p>
067     * <strong>Preconditions</strong>: <ul>
068     * <li>The input arrays must have the same length and their common length
069     * must be at least 2.
070     * </li></ul>
071     *
072     * @param sample1 array of sample data values
073     * @param sample2 array of sample data values
074     * @return t statistic
075     * @throws NullArgumentException if the arrays are <code>null</code>
076     * @throws NoDataException if the arrays are empty
077     * @throws DimensionMismatchException if the length of the arrays is not equal
078     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
079     */
080    public double pairedT(final double[] sample1, final double[] sample2)
081        throws NullArgumentException, NoDataException,
082        DimensionMismatchException, NumberIsTooSmallException {
083
084        checkSampleData(sample1);
085        checkSampleData(sample2);
086        double meanDifference = StatUtils.meanDifference(sample1, sample2);
087        return t(meanDifference, 0,
088                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
089                 sample1.length);
090    }
091
092    /**
093     * Returns the <i>observed significance level</i>, or
094     * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
095     * based on the data in the input arrays.
096     * <p>
097     * The number returned is the smallest significance level
098     * at which one can reject the null hypothesis that the mean of the paired
099     * differences is 0 in favor of the two-sided alternative that the mean paired
100     * difference is not equal to 0. For a one-sided test, divide the returned
101     * value by 2.</p>
102     * <p>
103     * This test is equivalent to a one-sample t-test computed using
104     * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
105     * array consisting of the signed differences between corresponding elements of
106     * <code>sample1</code> and <code>sample2.</code></p>
107     * <p>
108     * <strong>Usage Note:</strong><br>
109     * The validity of the p-value depends on the assumptions of the parametric
110     * t-test procedure, as discussed
111     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
112     * here</a></p>
113     * <p>
114     * <strong>Preconditions</strong>: <ul>
115     * <li>The input array lengths must be the same and their common length must
116     * be at least 2.
117     * </li></ul>
118     *
119     * @param sample1 array of sample data values
120     * @param sample2 array of sample data values
121     * @return p-value for t-test
122     * @throws NullArgumentException if the arrays are <code>null</code>
123     * @throws NoDataException if the arrays are empty
124     * @throws DimensionMismatchException if the length of the arrays is not equal
125     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
126     * @throws MaxCountExceededException if an error occurs computing the p-value
127     */
128    public double pairedTTest(final double[] sample1, final double[] sample2)
129        throws NullArgumentException, NoDataException, DimensionMismatchException,
130        NumberIsTooSmallException, MaxCountExceededException {
131
132        double meanDifference = StatUtils.meanDifference(sample1, sample2);
133        return tTest(meanDifference, 0,
134                StatUtils.varianceDifference(sample1, sample2, meanDifference),
135                sample1.length);
136    }
137
138    /**
139     * Performs a paired t-test evaluating the null hypothesis that the
140     * mean of the paired differences between <code>sample1</code> and
141     * <code>sample2</code> is 0 in favor of the two-sided alternative that the
142     * mean paired difference is not equal to 0, with significance level
143     * <code>alpha</code>.
144     * <p>
145     * Returns <code>true</code> iff the null hypothesis can be rejected with
146     * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
147     * <code>alpha * 2</code></p>
148     * <p>
149     * <strong>Usage Note:</strong><br>
150     * The validity of the test depends on the assumptions of the parametric
151     * t-test procedure, as discussed
152     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
153     * here</a></p>
154     * <p>
155     * <strong>Preconditions</strong>: <ul>
156     * <li>The input array lengths must be the same and their common length
157     * must be at least 2.
158     * </li>
159     * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
160     * </li></ul>
161     *
162     * @param sample1 array of sample data values
163     * @param sample2 array of sample data values
164     * @param alpha significance level of the test
165     * @return true if the null hypothesis can be rejected with
166     * confidence 1 - alpha
167     * @throws NullArgumentException if the arrays are <code>null</code>
168     * @throws NoDataException if the arrays are empty
169     * @throws DimensionMismatchException if the length of the arrays is not equal
170     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
171     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
172     * @throws MaxCountExceededException if an error occurs computing the p-value
173     */
174    public boolean pairedTTest(final double[] sample1, final double[] sample2,
175                               final double alpha)
176        throws NullArgumentException, NoDataException, DimensionMismatchException,
177        NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
178
179        checkSignificanceLevel(alpha);
180        return pairedTTest(sample1, sample2) < alpha;
181    }
182
183    /**
184     * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
185     * t statistic </a> given observed values and a comparison constant.
186     * <p>
187     * This statistic can be used to perform a one sample t-test for the mean.
188     * </p><p>
189     * <strong>Preconditions</strong>: <ul>
190     * <li>The observed array length must be at least 2.
191     * </li></ul>
192     *
193     * @param mu comparison constant
194     * @param observed array of values
195     * @return t statistic
196     * @throws NullArgumentException if <code>observed</code> is <code>null</code>
197     * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
198     */
199    public double t(final double mu, final double[] observed)
200        throws NullArgumentException, NumberIsTooSmallException {
201
202        checkSampleData(observed);
203        // No try-catch or advertised exception because args have just been checked
204        return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
205                observed.length);
206    }
207
208    /**
209     * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
210     * t statistic </a> to use in comparing the mean of the dataset described by
211     * <code>sampleStats</code> to <code>mu</code>.
212     * <p>
213     * This statistic can be used to perform a one sample t-test for the mean.
214     * </p><p>
215     * <strong>Preconditions</strong>: <ul>
216     * <li><code>observed.getN() &ge; 2</code>.
217     * </li></ul>
218     *
219     * @param mu comparison constant
220     * @param sampleStats DescriptiveStatistics holding sample summary statitstics
221     * @return t statistic
222     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
223     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
224     */
225    public double t(final double mu, final StatisticalSummary sampleStats)
226        throws NullArgumentException, NumberIsTooSmallException {
227
228        checkSampleData(sampleStats);
229        return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
230                 sampleStats.getN());
231    }
232
233    /**
234     * Computes a 2-sample t statistic,  under the hypothesis of equal
235     * subpopulation variances.  To compute a t-statistic without the
236     * equal variances hypothesis, use {@link #t(double[], double[])}.
237     * <p>
238     * This statistic can be used to perform a (homoscedastic) two-sample
239     * t-test to compare sample means.</p>
240     * <p>
241     * The t-statistic is</p>
242     * <p>
243     * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
244     * </p><p>
245     * where <strong><code>n1</code></strong> is the size of first sample;
246     * <strong><code> n2</code></strong> is the size of second sample;
247     * <strong><code> m1</code></strong> is the mean of first sample;
248     * <strong><code> m2</code></strong> is the mean of second sample
249     * and <strong><code>var</code></strong> is the pooled variance estimate:
250     * </p><p>
251     * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
252     * </p><p>
253     * with <strong><code>var1</code></strong> the variance of the first sample and
254     * <strong><code>var2</code></strong> the variance of the second sample.
255     * </p><p>
256     * <strong>Preconditions</strong>: <ul>
257     * <li>The observed array lengths must both be at least 2.
258     * </li></ul>
259     *
260     * @param sample1 array of sample data values
261     * @param sample2 array of sample data values
262     * @return t statistic
263     * @throws NullArgumentException if the arrays are <code>null</code>
264     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
265     */
266    public double homoscedasticT(final double[] sample1, final double[] sample2)
267        throws NullArgumentException, NumberIsTooSmallException {
268
269        checkSampleData(sample1);
270        checkSampleData(sample2);
271        // No try-catch or advertised exception because args have just been checked
272        return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
273                              StatUtils.variance(sample1), StatUtils.variance(sample2),
274                              sample1.length, sample2.length);
275    }
276
277    /**
278     * Computes a 2-sample t statistic, without the hypothesis of equal
279     * subpopulation variances.  To compute a t-statistic assuming equal
280     * variances, use {@link #homoscedasticT(double[], double[])}.
281     * <p>
282     * This statistic can be used to perform a two-sample t-test to compare
283     * sample means.</p>
284     * <p>
285     * The t-statistic is</p>
286     * <p>
287     * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
288     * </p><p>
289     *  where <strong><code>n1</code></strong> is the size of the first sample
290     * <strong><code> n2</code></strong> is the size of the second sample;
291     * <strong><code> m1</code></strong> is the mean of the first sample;
292     * <strong><code> m2</code></strong> is the mean of the second sample;
293     * <strong><code> var1</code></strong> is the variance of the first sample;
294     * <strong><code> var2</code></strong> is the variance of the second sample;
295     * </p><p>
296     * <strong>Preconditions</strong>: <ul>
297     * <li>The observed array lengths must both be at least 2.
298     * </li></ul>
299     *
300     * @param sample1 array of sample data values
301     * @param sample2 array of sample data values
302     * @return t statistic
303     * @throws NullArgumentException if the arrays are <code>null</code>
304     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
305     */
306    public double t(final double[] sample1, final double[] sample2)
307        throws NullArgumentException, NumberIsTooSmallException {
308
309        checkSampleData(sample1);
310        checkSampleData(sample2);
311        // No try-catch or advertised exception because args have just been checked
312        return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
313                 StatUtils.variance(sample1), StatUtils.variance(sample2),
314                 sample1.length, sample2.length);
315    }
316
317    /**
318     * Computes a 2-sample t statistic, comparing the means of the datasets
319     * described by two {@link StatisticalSummary} instances, without the
320     * assumption of equal subpopulation variances.  Use
321     * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
322     * compute a t-statistic under the equal variances assumption.
323     * <p>
324     * This statistic can be used to perform a two-sample t-test to compare
325     * sample means.</p>
326     * <p>
327      * The returned  t-statistic is</p>
328     * <p>
329     * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
330     * </p><p>
331     * where <strong><code>n1</code></strong> is the size of the first sample;
332     * <strong><code> n2</code></strong> is the size of the second sample;
333     * <strong><code> m1</code></strong> is the mean of the first sample;
334     * <strong><code> m2</code></strong> is the mean of the second sample
335     * <strong><code> var1</code></strong> is the variance of the first sample;
336     * <strong><code> var2</code></strong> is the variance of the second sample
337     * </p><p>
338     * <strong>Preconditions</strong>: <ul>
339     * <li>The datasets described by the two Univariates must each contain
340     * at least 2 observations.
341     * </li></ul>
342     *
343     * @param sampleStats1 StatisticalSummary describing data from the first sample
344     * @param sampleStats2 StatisticalSummary describing data from the second sample
345     * @return t statistic
346     * @throws NullArgumentException if the sample statistics are <code>null</code>
347     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
348     */
349    public double t(final StatisticalSummary sampleStats1,
350                    final StatisticalSummary sampleStats2)
351        throws NullArgumentException, NumberIsTooSmallException {
352
353        checkSampleData(sampleStats1);
354        checkSampleData(sampleStats2);
355        return t(sampleStats1.getMean(), sampleStats2.getMean(),
356                 sampleStats1.getVariance(), sampleStats2.getVariance(),
357                 sampleStats1.getN(), sampleStats2.getN());
358    }
359
360    /**
361     * Computes a 2-sample t statistic, comparing the means of the datasets
362     * described by two {@link StatisticalSummary} instances, under the
363     * assumption of equal subpopulation variances.  To compute a t-statistic
364     * without the equal variances assumption, use
365     * {@link #t(StatisticalSummary, StatisticalSummary)}.
366     * <p>
367     * This statistic can be used to perform a (homoscedastic) two-sample
368     * t-test to compare sample means.</p>
369     * <p>
370     * The t-statistic returned is</p>
371     * <p>
372     * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
373     * </p><p>
374     * where <strong><code>n1</code></strong> is the size of first sample;
375     * <strong><code> n2</code></strong> is the size of second sample;
376     * <strong><code> m1</code></strong> is the mean of first sample;
377     * <strong><code> m2</code></strong> is the mean of second sample
378     * and <strong><code>var</code></strong> is the pooled variance estimate:
379     * </p><p>
380     * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
381     * </p><p>
382     * with <strong><code>var1</code></strong> the variance of the first sample and
383     * <strong><code>var2</code></strong> the variance of the second sample.
384     * </p><p>
385     * <strong>Preconditions</strong>: <ul>
386     * <li>The datasets described by the two Univariates must each contain
387     * at least 2 observations.
388     * </li></ul>
389     *
390     * @param sampleStats1 StatisticalSummary describing data from the first sample
391     * @param sampleStats2 StatisticalSummary describing data from the second sample
392     * @return t statistic
393     * @throws NullArgumentException if the sample statistics are <code>null</code>
394     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
395     */
396    public double homoscedasticT(final StatisticalSummary sampleStats1,
397                                 final StatisticalSummary sampleStats2)
398        throws NullArgumentException, NumberIsTooSmallException {
399
400        checkSampleData(sampleStats1);
401        checkSampleData(sampleStats2);
402        return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
403                              sampleStats1.getVariance(), sampleStats2.getVariance(),
404                              sampleStats1.getN(), sampleStats2.getN());
405    }
406
407    /**
408     * Returns the <i>observed significance level</i>, or
409     * <i>p-value</i>, associated with a one-sample, two-tailed t-test
410     * comparing the mean of the input array with the constant <code>mu</code>.
411     * <p>
412     * The number returned is the smallest significance level
413     * at which one can reject the null hypothesis that the mean equals
414     * <code>mu</code> in favor of the two-sided alternative that the mean
415     * is different from <code>mu</code>. For a one-sided test, divide the
416     * returned value by 2.</p>
417     * <p>
418     * <strong>Usage Note:</strong><br>
419     * The validity of the test depends on the assumptions of the parametric
420     * t-test procedure, as discussed
421     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
422     * </p><p>
423     * <strong>Preconditions</strong>: <ul>
424     * <li>The observed array length must be at least 2.
425     * </li></ul>
426     *
427     * @param mu constant value to compare sample mean against
428     * @param sample array of sample data values
429     * @return p-value
430     * @throws NullArgumentException if the sample array is <code>null</code>
431     * @throws NumberIsTooSmallException if the length of the array is &lt; 2
432     * @throws MaxCountExceededException if an error occurs computing the p-value
433     */
434    public double tTest(final double mu, final double[] sample)
435        throws NullArgumentException, NumberIsTooSmallException,
436        MaxCountExceededException {
437
438        checkSampleData(sample);
439        // No try-catch or advertised exception because args have just been checked
440        return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
441                     sample.length);
442    }
443
444    /**
445     * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
446     * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
447     * which <code>sample</code> is drawn equals <code>mu</code>.
448     * <p>
449     * Returns <code>true</code> iff the null hypothesis can be
450     * rejected with confidence <code>1 - alpha</code>.  To
451     * perform a 1-sided test, use <code>alpha * 2</code></p>
452     * <p>
453     * <strong>Examples:</strong><br><ol>
454     * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
455     * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
456     * </li>
457     * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
458     * at the 99% level, first verify that the measured sample mean is less
459     * than <code>mu</code> and then use
460     * <br><code>tTest(mu, sample, 0.02) </code>
461     * </li></ol>
462     * <p>
463     * <strong>Usage Note:</strong><br>
464     * The validity of the test depends on the assumptions of the one-sample
465     * parametric t-test procedure, as discussed
466     * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
467     * </p><p>
468     * <strong>Preconditions</strong>: <ul>
469     * <li>The observed array length must be at least 2.
470     * </li></ul>
471     *
472     * @param mu constant value to compare sample mean against
473     * @param sample array of sample data values
474     * @param alpha significance level of the test
475     * @return p-value
476     * @throws NullArgumentException if the sample array is <code>null</code>
477     * @throws NumberIsTooSmallException if the length of the array is &lt; 2
478     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
479     * @throws MaxCountExceededException if an error computing the p-value
480     */
481    public boolean tTest(final double mu, final double[] sample, final double alpha)
482        throws NullArgumentException, NumberIsTooSmallException,
483        OutOfRangeException, MaxCountExceededException {
484
485        checkSignificanceLevel(alpha);
486        return tTest(mu, sample) < alpha;
487    }
488
489    /**
490     * Returns the <i>observed significance level</i>, or
491     * <i>p-value</i>, associated with a one-sample, two-tailed t-test
492     * comparing the mean of the dataset described by <code>sampleStats</code>
493     * with the constant <code>mu</code>.
494     * <p>
495     * The number returned is the smallest significance level
496     * at which one can reject the null hypothesis that the mean equals
497     * <code>mu</code> in favor of the two-sided alternative that the mean
498     * is different from <code>mu</code>. For a one-sided test, divide the
499     * returned value by 2.</p>
500     * <p>
501     * <strong>Usage Note:</strong><br>
502     * The validity of the test depends on the assumptions of the parametric
503     * t-test procedure, as discussed
504     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
505     * here</a></p>
506     * <p>
507     * <strong>Preconditions</strong>: <ul>
508     * <li>The sample must contain at least 2 observations.
509     * </li></ul>
510     *
511     * @param mu constant value to compare sample mean against
512     * @param sampleStats StatisticalSummary describing sample data
513     * @return p-value
514     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
515     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
516     * @throws MaxCountExceededException if an error occurs computing the p-value
517     */
518    public double tTest(final double mu, final StatisticalSummary sampleStats)
519        throws NullArgumentException, NumberIsTooSmallException,
520        MaxCountExceededException {
521
522        checkSampleData(sampleStats);
523        return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
524                     sampleStats.getN());
525    }
526
527    /**
528     * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
529     * two-sided t-test</a> evaluating the null hypothesis that the mean of the
530     * population from which the dataset described by <code>stats</code> is
531     * drawn equals <code>mu</code>.
532     * <p>
533     * Returns <code>true</code> iff the null hypothesis can be rejected with
534     * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
535     * <code>alpha * 2.</code></p>
536     * <p>
537     * <strong>Examples:</strong><br><ol>
538     * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
539     * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
540     * </li>
541     * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
542     * at the 99% level, first verify that the measured sample mean is less
543     * than <code>mu</code> and then use
544     * <br><code>tTest(mu, sampleStats, 0.02) </code>
545     * </li></ol>
546     * <p>
547     * <strong>Usage Note:</strong><br>
548     * The validity of the test depends on the assumptions of the one-sample
549     * parametric t-test procedure, as discussed
550     * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
551     * </p><p>
552     * <strong>Preconditions</strong>: <ul>
553     * <li>The sample must include at least 2 observations.
554     * </li></ul>
555     *
556     * @param mu constant value to compare sample mean against
557     * @param sampleStats StatisticalSummary describing sample data values
558     * @param alpha significance level of the test
559     * @return p-value
560     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
561     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
562     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
563     * @throws MaxCountExceededException if an error occurs computing the p-value
564     */
565    public boolean tTest(final double mu, final StatisticalSummary sampleStats,
566                         final double alpha)
567    throws NullArgumentException, NumberIsTooSmallException,
568    OutOfRangeException, MaxCountExceededException {
569
570        checkSignificanceLevel(alpha);
571        return tTest(mu, sampleStats) < alpha;
572    }
573
574    /**
575     * Returns the <i>observed significance level</i>, or
576     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
577     * comparing the means of the input arrays.
578     * <p>
579     * The number returned is the smallest significance level
580     * at which one can reject the null hypothesis that the two means are
581     * equal in favor of the two-sided alternative that they are different.
582     * For a one-sided test, divide the returned value by 2.</p>
583     * <p>
584     * The test does not assume that the underlying popuation variances are
585     * equal  and it uses approximated degrees of freedom computed from the
586     * sample data to compute the p-value.  The t-statistic used is as defined in
587     * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
588     * to the degrees of freedom is used,
589     * as described
590     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
591     * here.</a>  To perform the test under the assumption of equal subpopulation
592     * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
593     * <p>
594     * <strong>Usage Note:</strong><br>
595     * The validity of the p-value depends on the assumptions of the parametric
596     * t-test procedure, as discussed
597     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
598     * here</a></p>
599     * <p>
600     * <strong>Preconditions</strong>: <ul>
601     * <li>The observed array lengths must both be at least 2.
602     * </li></ul>
603     *
604     * @param sample1 array of sample data values
605     * @param sample2 array of sample data values
606     * @return p-value for t-test
607     * @throws NullArgumentException if the arrays are <code>null</code>
608     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
609     * @throws MaxCountExceededException if an error occurs computing the p-value
610     */
611    public double tTest(final double[] sample1, final double[] sample2)
612        throws NullArgumentException, NumberIsTooSmallException,
613        MaxCountExceededException {
614
615        checkSampleData(sample1);
616        checkSampleData(sample2);
617        // No try-catch or advertised exception because args have just been checked
618        return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
619                     StatUtils.variance(sample1), StatUtils.variance(sample2),
620                     sample1.length, sample2.length);
621    }
622
623    /**
624     * Returns the <i>observed significance level</i>, or
625     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
626     * comparing the means of the input arrays, under the assumption that
627     * the two samples are drawn from subpopulations with equal variances.
628     * To perform the test without the equal variances assumption, use
629     * {@link #tTest(double[], double[])}.
630     * <p>
631     * The number returned is the smallest significance level
632     * at which one can reject the null hypothesis that the two means are
633     * equal in favor of the two-sided alternative that they are different.
634     * For a one-sided test, divide the returned value by 2.</p>
635     * <p>
636     * A pooled variance estimate is used to compute the t-statistic.  See
637     * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
638     * minus 2 is used as the degrees of freedom.</p>
639     * <p>
640     * <strong>Usage Note:</strong><br>
641     * The validity of the p-value depends on the assumptions of the parametric
642     * t-test procedure, as discussed
643     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
644     * here</a></p>
645     * <p>
646     * <strong>Preconditions</strong>: <ul>
647     * <li>The observed array lengths must both be at least 2.
648     * </li></ul>
649     *
650     * @param sample1 array of sample data values
651     * @param sample2 array of sample data values
652     * @return p-value for t-test
653     * @throws NullArgumentException if the arrays are <code>null</code>
654     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
655     * @throws MaxCountExceededException if an error occurs computing the p-value
656     */
657    public double homoscedasticTTest(final double[] sample1, final double[] sample2)
658        throws NullArgumentException, NumberIsTooSmallException,
659        MaxCountExceededException {
660
661        checkSampleData(sample1);
662        checkSampleData(sample2);
663        // No try-catch or advertised exception because args have just been checked
664        return homoscedasticTTest(StatUtils.mean(sample1),
665                                  StatUtils.mean(sample2),
666                                  StatUtils.variance(sample1),
667                                  StatUtils.variance(sample2),
668                                  sample1.length, sample2.length);
669    }
670
671    /**
672     * Performs a
673     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
674     * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
675     * and <code>sample2</code> are drawn from populations with the same mean,
676     * with significance level <code>alpha</code>.  This test does not assume
677     * that the subpopulation variances are equal.  To perform the test assuming
678     * equal variances, use
679     * {@link #homoscedasticTTest(double[], double[], double)}.
680     * <p>
681     * Returns <code>true</code> iff the null hypothesis that the means are
682     * equal can be rejected with confidence <code>1 - alpha</code>.  To
683     * perform a 1-sided test, use <code>alpha * 2</code></p>
684     * <p>
685     * See {@link #t(double[], double[])} for the formula used to compute the
686     * t-statistic.  Degrees of freedom are approximated using the
687     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
688     * Welch-Satterthwaite approximation.</a></p>
689     * <p>
690     * <strong>Examples:</strong><br><ol>
691     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
692     * the 95% level,  use
693     * <br><code>tTest(sample1, sample2, 0.05). </code>
694     * </li>
695     * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>,
696     * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
697     * is less than the mean of <code>sample 2</code> and then use
698     * <br><code>tTest(sample1, sample2, 0.02) </code>
699     * </li></ol>
700     * <p>
701     * <strong>Usage Note:</strong><br>
702     * The validity of the test depends on the assumptions of the parametric
703     * t-test procedure, as discussed
704     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
705     * here</a></p>
706     * <p>
707     * <strong>Preconditions</strong>: <ul>
708     * <li>The observed array lengths must both be at least 2.
709     * </li>
710     * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
711     * </li></ul>
712     *
713     * @param sample1 array of sample data values
714     * @param sample2 array of sample data values
715     * @param alpha significance level of the test
716     * @return true if the null hypothesis can be rejected with
717     * confidence 1 - alpha
718     * @throws NullArgumentException if the arrays are <code>null</code>
719     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
720     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
721     * @throws MaxCountExceededException if an error occurs computing the p-value
722     */
723    public boolean tTest(final double[] sample1, final double[] sample2,
724                         final double alpha)
725        throws NullArgumentException, NumberIsTooSmallException,
726        OutOfRangeException, MaxCountExceededException {
727
728        checkSignificanceLevel(alpha);
729        return tTest(sample1, sample2) < alpha;
730    }
731
732    /**
733     * Performs a
734     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
735     * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
736     * and <code>sample2</code> are drawn from populations with the same mean,
737     * with significance level <code>alpha</code>,  assuming that the
738     * subpopulation variances are equal.  Use
739     * {@link #tTest(double[], double[], double)} to perform the test without
740     * the assumption of equal variances.
741     * <p>
742     * Returns <code>true</code> iff the null hypothesis that the means are
743     * equal can be rejected with confidence <code>1 - alpha</code>.  To
744     * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
745     * without the assumption of equal subpopulation variances, use
746     * {@link #tTest(double[], double[], double)}.</p>
747     * <p>
748     * A pooled variance estimate is used to compute the t-statistic. See
749     * {@link #t(double[], double[])} for the formula. The sum of the sample
750     * sizes minus 2 is used as the degrees of freedom.</p>
751     * <p>
752     * <strong>Examples:</strong><br><ol>
753     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
754     * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
755     * </li>
756     * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2, </code>
757     * at the 99% level, first verify that the measured mean of
758     * <code>sample 1</code> is less than the mean of <code>sample 2</code>
759     * and then use
760     * <br><code>tTest(sample1, sample2, 0.02) </code>
761     * </li></ol>
762     * <p>
763     * <strong>Usage Note:</strong><br>
764     * The validity of the test depends on the assumptions of the parametric
765     * t-test procedure, as discussed
766     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
767     * here</a></p>
768     * <p>
769     * <strong>Preconditions</strong>: <ul>
770     * <li>The observed array lengths must both be at least 2.
771     * </li>
772     * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
773     * </li></ul>
774     *
775     * @param sample1 array of sample data values
776     * @param sample2 array of sample data values
777     * @param alpha significance level of the test
778     * @return true if the null hypothesis can be rejected with
779     * confidence 1 - alpha
780     * @throws NullArgumentException if the arrays are <code>null</code>
781     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
782     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
783     * @throws MaxCountExceededException if an error occurs computing the p-value
784     */
785    public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
786                                      final double alpha)
787        throws NullArgumentException, NumberIsTooSmallException,
788        OutOfRangeException, MaxCountExceededException {
789
790        checkSignificanceLevel(alpha);
791        return homoscedasticTTest(sample1, sample2) < alpha;
792    }
793
794    /**
795     * Returns the <i>observed significance level</i>, or
796     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
797     * comparing the means of the datasets described by two StatisticalSummary
798     * instances.
799     * <p>
800     * The number returned is the smallest significance level
801     * at which one can reject the null hypothesis that the two means are
802     * equal in favor of the two-sided alternative that they are different.
803     * For a one-sided test, divide the returned value by 2.</p>
804     * <p>
805     * The test does not assume that the underlying population variances are
806     * equal  and it uses approximated degrees of freedom computed from the
807     * sample data to compute the p-value.   To perform the test assuming
808     * equal variances, use
809     * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
810     * <p>
811     * <strong>Usage Note:</strong><br>
812     * The validity of the p-value depends on the assumptions of the parametric
813     * t-test procedure, as discussed
814     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
815     * here</a></p>
816     * <p>
817     * <strong>Preconditions</strong>: <ul>
818     * <li>The datasets described by the two Univariates must each contain
819     * at least 2 observations.
820     * </li></ul>
821     *
822     * @param sampleStats1  StatisticalSummary describing data from the first sample
823     * @param sampleStats2  StatisticalSummary describing data from the second sample
824     * @return p-value for t-test
825     * @throws NullArgumentException if the sample statistics are <code>null</code>
826     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
827     * @throws MaxCountExceededException if an error occurs computing the p-value
828     */
829    public double tTest(final StatisticalSummary sampleStats1,
830                        final StatisticalSummary sampleStats2)
831        throws NullArgumentException, NumberIsTooSmallException,
832        MaxCountExceededException {
833
834        checkSampleData(sampleStats1);
835        checkSampleData(sampleStats2);
836        return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
837                     sampleStats1.getVariance(), sampleStats2.getVariance(),
838                     sampleStats1.getN(), sampleStats2.getN());
839    }
840
841    /**
842     * Returns the <i>observed significance level</i>, or
843     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
844     * comparing the means of the datasets described by two StatisticalSummary
845     * instances, under the hypothesis of equal subpopulation variances. To
846     * perform a test without the equal variances assumption, use
847     * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
848     * <p>
849     * The number returned is the smallest significance level
850     * at which one can reject the null hypothesis that the two means are
851     * equal in favor of the two-sided alternative that they are different.
852     * For a one-sided test, divide the returned value by 2.</p>
853     * <p>
854     * See {@link #homoscedasticT(double[], double[])} for the formula used to
855     * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
856     * the degrees of freedom.</p>
857     * <p>
858     * <strong>Usage Note:</strong><br>
859     * The validity of the p-value depends on the assumptions of the parametric
860     * t-test procedure, as discussed
861     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
862     * </p><p>
863     * <strong>Preconditions</strong>: <ul>
864     * <li>The datasets described by the two Univariates must each contain
865     * at least 2 observations.
866     * </li></ul>
867     *
868     * @param sampleStats1  StatisticalSummary describing data from the first sample
869     * @param sampleStats2  StatisticalSummary describing data from the second sample
870     * @return p-value for t-test
871     * @throws NullArgumentException if the sample statistics are <code>null</code>
872     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
873     * @throws MaxCountExceededException if an error occurs computing the p-value
874     */
875    public double homoscedasticTTest(final StatisticalSummary sampleStats1,
876                                     final StatisticalSummary sampleStats2)
877        throws NullArgumentException, NumberIsTooSmallException,
878        MaxCountExceededException {
879
880        checkSampleData(sampleStats1);
881        checkSampleData(sampleStats2);
882        return homoscedasticTTest(sampleStats1.getMean(),
883                                  sampleStats2.getMean(),
884                                  sampleStats1.getVariance(),
885                                  sampleStats2.getVariance(),
886                                  sampleStats1.getN(), sampleStats2.getN());
887    }
888
889    /**
890     * Performs a
891     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
892     * two-sided t-test</a> evaluating the null hypothesis that
893     * <code>sampleStats1</code> and <code>sampleStats2</code> describe
894     * datasets drawn from populations with the same mean, with significance
895     * level <code>alpha</code>.   This test does not assume that the
896     * subpopulation variances are equal.  To perform the test under the equal
897     * variances assumption, use
898     * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
899     * <p>
900     * Returns <code>true</code> iff the null hypothesis that the means are
901     * equal can be rejected with confidence <code>1 - alpha</code>.  To
902     * perform a 1-sided test, use <code>alpha * 2</code></p>
903     * <p>
904     * See {@link #t(double[], double[])} for the formula used to compute the
905     * t-statistic.  Degrees of freedom are approximated using the
906     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
907     * Welch-Satterthwaite approximation.</a></p>
908     * <p>
909     * <strong>Examples:</strong><br><ol>
910     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
911     * the 95%, use
912     * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
913     * </li>
914     * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>
915     * at the 99% level,  first verify that the measured mean of
916     * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
917     * and then use
918     * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
919     * </li></ol>
920     * <p>
921     * <strong>Usage Note:</strong><br>
922     * The validity of the test depends on the assumptions of the parametric
923     * t-test procedure, as discussed
924     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
925     * here</a></p>
926     * <p>
927     * <strong>Preconditions</strong>: <ul>
928     * <li>The datasets described by the two Univariates must each contain
929     * at least 2 observations.
930     * </li>
931     * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
932     * </li></ul>
933     *
934     * @param sampleStats1 StatisticalSummary describing sample data values
935     * @param sampleStats2 StatisticalSummary describing sample data values
936     * @param alpha significance level of the test
937     * @return true if the null hypothesis can be rejected with
938     * confidence 1 - alpha
939     * @throws NullArgumentException if the sample statistics are <code>null</code>
940     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
941     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
942     * @throws MaxCountExceededException if an error occurs computing the p-value
943     */
944    public boolean tTest(final StatisticalSummary sampleStats1,
945                         final StatisticalSummary sampleStats2,
946                         final double alpha)
947        throws NullArgumentException, NumberIsTooSmallException,
948        OutOfRangeException, MaxCountExceededException {
949
950        checkSignificanceLevel(alpha);
951        return tTest(sampleStats1, sampleStats2) < alpha;
952    }
953
954    //----------------------------------------------- Protected methods
955
956    /**
957     * Computes approximate degrees of freedom for 2-sample t-test.
958     *
959     * @param v1 first sample variance
960     * @param v2 second sample variance
961     * @param n1 first sample n
962     * @param n2 second sample n
963     * @return approximate degrees of freedom
964     */
965    protected double df(double v1, double v2, double n1, double n2) {
966        return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
967        ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
968                (n2 * n2 * (n2 - 1d)));
969    }
970
971    /**
972     * Computes t test statistic for 1-sample t-test.
973     *
974     * @param m sample mean
975     * @param mu constant to test against
976     * @param v sample variance
977     * @param n sample n
978     * @return t test statistic
979     */
980    protected double t(final double m, final double mu,
981                       final double v, final double n) {
982        return (m - mu) / JdkMath.sqrt(v / n);
983    }
984
985    /**
986     * Computes t test statistic for 2-sample t-test.
987     * <p>
988     * Does not assume that subpopulation variances are equal.</p>
989     *
990     * @param m1 first sample mean
991     * @param m2 second sample mean
992     * @param v1 first sample variance
993     * @param v2 second sample variance
994     * @param n1 first sample n
995     * @param n2 second sample n
996     * @return t test statistic
997     */
998    protected double t(final double m1, final double m2,
999                       final double v1, final double v2,
1000                       final double n1, final double n2)  {
1001        return (m1 - m2) / JdkMath.sqrt((v1 / n1) + (v2 / n2));
1002    }
1003
1004    /**
1005     * Computes t test statistic for 2-sample t-test under the hypothesis
1006     * of equal subpopulation variances.
1007     *
1008     * @param m1 first sample mean
1009     * @param m2 second sample mean
1010     * @param v1 first sample variance
1011     * @param v2 second sample variance
1012     * @param n1 first sample n
1013     * @param n2 second sample n
1014     * @return t test statistic
1015     */
1016    protected double homoscedasticT(final double m1, final double m2,
1017                                    final double v1, final double v2,
1018                                    final double n1, final double n2)  {
1019        final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1020        return (m1 - m2) / JdkMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1021    }
1022
1023    /**
1024     * Computes p-value for 2-sided, 1-sample t-test.
1025     *
1026     * @param m sample mean
1027     * @param mu constant to test against
1028     * @param v sample variance
1029     * @param n sample n
1030     * @return p-value
1031     * @throws MaxCountExceededException if an error occurs computing the p-value
1032     * @throws MathIllegalArgumentException if n is not greater than 1
1033     */
1034    protected double tTest(final double m, final double mu,
1035                           final double v, final double n)
1036        throws MaxCountExceededException, MathIllegalArgumentException {
1037
1038        final double t = JdkMath.abs(t(m, mu, v, n));
1039        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1040        final TDistribution distribution = TDistribution.of(n - 1);
1041        return 2.0 * distribution.cumulativeProbability(-t);
1042    }
1043
1044    /**
1045     * Computes p-value for 2-sided, 2-sample t-test.
1046     * <p>
1047     * Does not assume subpopulation variances are equal. Degrees of freedom
1048     * are estimated from the data.</p>
1049     *
1050     * @param m1 first sample mean
1051     * @param m2 second sample mean
1052     * @param v1 first sample variance
1053     * @param v2 second sample variance
1054     * @param n1 first sample n
1055     * @param n2 second sample n
1056     * @return p-value
1057     * @throws MaxCountExceededException if an error occurs computing the p-value
1058     * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1059     * strictly positive
1060     */
1061    protected double tTest(final double m1, final double m2,
1062                           final double v1, final double v2,
1063                           final double n1, final double n2)
1064        throws MaxCountExceededException, NotStrictlyPositiveException {
1065
1066        final double t = JdkMath.abs(t(m1, m2, v1, v2, n1, n2));
1067        final double degreesOfFreedom = df(v1, v2, n1, n2);
1068        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1069        final TDistribution distribution = TDistribution.of(degreesOfFreedom);
1070        return 2.0 * distribution.cumulativeProbability(-t);
1071    }
1072
1073    /**
1074     * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1075     * of equal subpopulation variances.
1076     * <p>
1077     * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1078     *
1079     * @param m1 first sample mean
1080     * @param m2 second sample mean
1081     * @param v1 first sample variance
1082     * @param v2 second sample variance
1083     * @param n1 first sample n
1084     * @param n2 second sample n
1085     * @return p-value
1086     * @throws MaxCountExceededException if an error occurs computing the p-value
1087     * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1088     * strictly positive
1089     */
1090    protected double homoscedasticTTest(double m1, double m2,
1091                                        double v1, double v2,
1092                                        double n1, double n2)
1093        throws MaxCountExceededException, NotStrictlyPositiveException {
1094
1095        final double t = JdkMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1096        final double degreesOfFreedom = n1 + n2 - 2;
1097        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1098        final TDistribution distribution = TDistribution.of(degreesOfFreedom);
1099        return 2.0 * distribution.cumulativeProbability(-t);
1100    }
1101
1102    /**
1103     * Check significance level.
1104     *
1105     * @param alpha significance level
1106     * @throws OutOfRangeException if the significance level is out of bounds.
1107     */
1108    private void checkSignificanceLevel(final double alpha)
1109        throws OutOfRangeException {
1110
1111        if (alpha <= 0 || alpha > 0.5) {
1112            throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1113                                          alpha, 0.0, 0.5);
1114        }
1115    }
1116
1117    /**
1118     * Check sample data.
1119     *
1120     * @param data Sample data.
1121     * @throws NullArgumentException if {@code data} is {@code null}.
1122     * @throws NumberIsTooSmallException if there is not enough sample data.
1123     */
1124    private void checkSampleData(final double[] data)
1125        throws NullArgumentException, NumberIsTooSmallException {
1126
1127        if (data == null) {
1128            throw new NullArgumentException();
1129        }
1130        if (data.length < 2) {
1131            throw new NumberIsTooSmallException(
1132                    LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1133                    data.length, 2, true);
1134        }
1135    }
1136
1137    /**
1138     * Check sample data.
1139     *
1140     * @param stat Statistical summary.
1141     * @throws NullArgumentException if {@code data} is {@code null}.
1142     * @throws NumberIsTooSmallException if there is not enough sample data.
1143     */
1144    private void checkSampleData(final StatisticalSummary stat)
1145        throws NullArgumentException, NumberIsTooSmallException {
1146
1147        if (stat == null) {
1148            throw new NullArgumentException();
1149        }
1150        if (stat.getN() < 2) {
1151            throw new NumberIsTooSmallException(
1152                    LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1153                    stat.getN(), 2, true);
1154        }
1155    }
1156}