001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.inference;
018
019import org.apache.commons.math3.distribution.TDistribution;
020import org.apache.commons.math3.exception.DimensionMismatchException;
021import org.apache.commons.math3.exception.MathIllegalArgumentException;
022import org.apache.commons.math3.exception.MaxCountExceededException;
023import org.apache.commons.math3.exception.NoDataException;
024import org.apache.commons.math3.exception.NotStrictlyPositiveException;
025import org.apache.commons.math3.exception.NullArgumentException;
026import org.apache.commons.math3.exception.NumberIsTooSmallException;
027import org.apache.commons.math3.exception.OutOfRangeException;
028import org.apache.commons.math3.exception.util.LocalizedFormats;
029import org.apache.commons.math3.stat.StatUtils;
030import org.apache.commons.math3.stat.descriptive.StatisticalSummary;
031import org.apache.commons.math3.util.FastMath;
032
033/**
034 * An implementation for Student's t-tests.
035 * <p>
036 * Tests can be:<ul>
037 * <li>One-sample or two-sample</li>
038 * <li>One-sided or two-sided</li>
039 * <li>Paired or unpaired (for two-sample tests)</li>
040 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
041 * (for two sample tests)</li>
042 * <li>Fixed significance level (boolean-valued) or returning p-values.
043 * </li></ul></p>
044 * <p>
045 * Test statistics are available for all tests.  Methods including "Test" in
046 * in their names perform tests, all other methods return t-statistics.  Among
047 * the "Test" methods, <code>double-</code>valued methods return p-values;
048 * <code>boolean-</code>valued methods perform fixed significance level tests.
049 * Significance levels are always specified as numbers between 0 and 0.5
050 * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).</p>
051 * <p>
052 * Input to tests can be either <code>double[]</code> arrays or
053 * {@link StatisticalSummary} instances.</p><p>
054 * Uses commons-math {@link org.apache.commons.math3.distribution.TDistribution}
055 * implementation to estimate exact p-values.</p>
056 *
057 */
058public class TTest {
059    /**
060     * Computes a paired, 2-sample t-statistic based on the data in the input
061     * arrays.  The t-statistic returned is equivalent to what would be returned by
062     * computing the one-sample t-statistic {@link #t(double, double[])}, with
063     * <code>mu = 0</code> and the sample array consisting of the (signed)
064     * differences between corresponding entries in <code>sample1</code> and
065     * <code>sample2.</code>
066     * <p>
067     * <strong>Preconditions</strong>: <ul>
068     * <li>The input arrays must have the same length and their common length
069     * must be at least 2.
070     * </li></ul></p>
071     *
072     * @param sample1 array of sample data values
073     * @param sample2 array of sample data values
074     * @return t statistic
075     * @throws NullArgumentException if the arrays are <code>null</code>
076     * @throws NoDataException if the arrays are empty
077     * @throws DimensionMismatchException if the length of the arrays is not equal
078     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
079     */
080    public double pairedT(final double[] sample1, final double[] sample2)
081        throws NullArgumentException, NoDataException,
082        DimensionMismatchException, NumberIsTooSmallException {
083
084        checkSampleData(sample1);
085        checkSampleData(sample2);
086        double meanDifference = StatUtils.meanDifference(sample1, sample2);
087        return t(meanDifference, 0,
088                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
089                 sample1.length);
090
091    }
092
093    /**
094     * Returns the <i>observed significance level</i>, or
095     * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
096     * based on the data in the input arrays.
097     * <p>
098     * The number returned is the smallest significance level
099     * at which one can reject the null hypothesis that the mean of the paired
100     * differences is 0 in favor of the two-sided alternative that the mean paired
101     * difference is not equal to 0. For a one-sided test, divide the returned
102     * value by 2.</p>
103     * <p>
104     * This test is equivalent to a one-sample t-test computed using
105     * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
106     * array consisting of the signed differences between corresponding elements of
107     * <code>sample1</code> and <code>sample2.</code></p>
108     * <p>
109     * <strong>Usage Note:</strong><br>
110     * The validity of the p-value depends on the assumptions of the parametric
111     * t-test procedure, as discussed
112     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
113     * here</a></p>
114     * <p>
115     * <strong>Preconditions</strong>: <ul>
116     * <li>The input array lengths must be the same and their common length must
117     * be at least 2.
118     * </li></ul></p>
119     *
120     * @param sample1 array of sample data values
121     * @param sample2 array of sample data values
122     * @return p-value for t-test
123     * @throws NullArgumentException if the arrays are <code>null</code>
124     * @throws NoDataException if the arrays are empty
125     * @throws DimensionMismatchException if the length of the arrays is not equal
126     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
127     * @throws MaxCountExceededException if an error occurs computing the p-value
128     */
129    public double pairedTTest(final double[] sample1, final double[] sample2)
130        throws NullArgumentException, NoDataException, DimensionMismatchException,
131        NumberIsTooSmallException, MaxCountExceededException {
132
133        double meanDifference = StatUtils.meanDifference(sample1, sample2);
134        return tTest(meanDifference, 0,
135                StatUtils.varianceDifference(sample1, sample2, meanDifference),
136                sample1.length);
137
138    }
139
140    /**
141     * Performs a paired t-test evaluating the null hypothesis that the
142     * mean of the paired differences between <code>sample1</code> and
143     * <code>sample2</code> is 0 in favor of the two-sided alternative that the
144     * mean paired difference is not equal to 0, with significance level
145     * <code>alpha</code>.
146     * <p>
147     * Returns <code>true</code> iff the null hypothesis can be rejected with
148     * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
149     * <code>alpha * 2</code></p>
150     * <p>
151     * <strong>Usage Note:</strong><br>
152     * The validity of the test depends on the assumptions of the parametric
153     * t-test procedure, as discussed
154     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
155     * here</a></p>
156     * <p>
157     * <strong>Preconditions</strong>: <ul>
158     * <li>The input array lengths must be the same and their common length
159     * must be at least 2.
160     * </li>
161     * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
162     * </li></ul></p>
163     *
164     * @param sample1 array of sample data values
165     * @param sample2 array of sample data values
166     * @param alpha significance level of the test
167     * @return true if the null hypothesis can be rejected with
168     * confidence 1 - alpha
169     * @throws NullArgumentException if the arrays are <code>null</code>
170     * @throws NoDataException if the arrays are empty
171     * @throws DimensionMismatchException if the length of the arrays is not equal
172     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
173     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
174     * @throws MaxCountExceededException if an error occurs computing the p-value
175     */
176    public boolean pairedTTest(final double[] sample1, final double[] sample2,
177                               final double alpha)
178        throws NullArgumentException, NoDataException, DimensionMismatchException,
179        NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
180
181        checkSignificanceLevel(alpha);
182        return pairedTTest(sample1, sample2) < alpha;
183
184    }
185
186    /**
187     * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
188     * t statistic </a> given observed values and a comparison constant.
189     * <p>
190     * This statistic can be used to perform a one sample t-test for the mean.
191     * </p><p>
192     * <strong>Preconditions</strong>: <ul>
193     * <li>The observed array length must be at least 2.
194     * </li></ul></p>
195     *
196     * @param mu comparison constant
197     * @param observed array of values
198     * @return t statistic
199     * @throws NullArgumentException if <code>observed</code> is <code>null</code>
200     * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
201     */
202    public double t(final double mu, final double[] observed)
203        throws NullArgumentException, NumberIsTooSmallException {
204
205        checkSampleData(observed);
206        // No try-catch or advertised exception because args have just been checked
207        return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
208                observed.length);
209
210    }
211
212    /**
213     * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
214     * t statistic </a> to use in comparing the mean of the dataset described by
215     * <code>sampleStats</code> to <code>mu</code>.
216     * <p>
217     * This statistic can be used to perform a one sample t-test for the mean.
218     * </p><p>
219     * <strong>Preconditions</strong>: <ul>
220     * <li><code>observed.getN() &ge; 2</code>.
221     * </li></ul></p>
222     *
223     * @param mu comparison constant
224     * @param sampleStats DescriptiveStatistics holding sample summary statitstics
225     * @return t statistic
226     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
227     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
228     */
229    public double t(final double mu, final StatisticalSummary sampleStats)
230        throws NullArgumentException, NumberIsTooSmallException {
231
232        checkSampleData(sampleStats);
233        return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
234                 sampleStats.getN());
235
236    }
237
238    /**
239     * Computes a 2-sample t statistic,  under the hypothesis of equal
240     * subpopulation variances.  To compute a t-statistic without the
241     * equal variances hypothesis, use {@link #t(double[], double[])}.
242     * <p>
243     * This statistic can be used to perform a (homoscedastic) two-sample
244     * t-test to compare sample means.</p>
245     * <p>
246     * The t-statistic is</p>
247     * <p>
248     * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
249     * </p><p>
250     * where <strong><code>n1</code></strong> is the size of first sample;
251     * <strong><code> n2</code></strong> is the size of second sample;
252     * <strong><code> m1</code></strong> is the mean of first sample;
253     * <strong><code> m2</code></strong> is the mean of second sample</li>
254     * </ul>
255     * and <strong><code>var</code></strong> is the pooled variance estimate:
256     * </p><p>
257     * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
258     * </p><p>
259     * with <strong><code>var1</code></strong> the variance of the first sample and
260     * <strong><code>var2</code></strong> the variance of the second sample.
261     * </p><p>
262     * <strong>Preconditions</strong>: <ul>
263     * <li>The observed array lengths must both be at least 2.
264     * </li></ul></p>
265     *
266     * @param sample1 array of sample data values
267     * @param sample2 array of sample data values
268     * @return t statistic
269     * @throws NullArgumentException if the arrays are <code>null</code>
270     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
271     */
272    public double homoscedasticT(final double[] sample1, final double[] sample2)
273        throws NullArgumentException, NumberIsTooSmallException {
274
275        checkSampleData(sample1);
276        checkSampleData(sample2);
277        // No try-catch or advertised exception because args have just been checked
278        return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
279                              StatUtils.variance(sample1), StatUtils.variance(sample2),
280                              sample1.length, sample2.length);
281
282    }
283
284    /**
285     * Computes a 2-sample t statistic, without the hypothesis of equal
286     * subpopulation variances.  To compute a t-statistic assuming equal
287     * variances, use {@link #homoscedasticT(double[], double[])}.
288     * <p>
289     * This statistic can be used to perform a two-sample t-test to compare
290     * sample means.</p>
291     * <p>
292     * The t-statistic is</p>
293     * <p>
294     * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
295     * </p><p>
296     *  where <strong><code>n1</code></strong> is the size of the first sample
297     * <strong><code> n2</code></strong> is the size of the second sample;
298     * <strong><code> m1</code></strong> is the mean of the first sample;
299     * <strong><code> m2</code></strong> is the mean of the second sample;
300     * <strong><code> var1</code></strong> is the variance of the first sample;
301     * <strong><code> var2</code></strong> is the variance of the second sample;
302     * </p><p>
303     * <strong>Preconditions</strong>: <ul>
304     * <li>The observed array lengths must both be at least 2.
305     * </li></ul></p>
306     *
307     * @param sample1 array of sample data values
308     * @param sample2 array of sample data values
309     * @return t statistic
310     * @throws NullArgumentException if the arrays are <code>null</code>
311     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
312     */
313    public double t(final double[] sample1, final double[] sample2)
314        throws NullArgumentException, NumberIsTooSmallException {
315
316        checkSampleData(sample1);
317        checkSampleData(sample2);
318        // No try-catch or advertised exception because args have just been checked
319        return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
320                 StatUtils.variance(sample1), StatUtils.variance(sample2),
321                 sample1.length, sample2.length);
322
323    }
324
325    /**
326     * Computes a 2-sample t statistic </a>, comparing the means of the datasets
327     * described by two {@link StatisticalSummary} instances, without the
328     * assumption of equal subpopulation variances.  Use
329     * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
330     * compute a t-statistic under the equal variances assumption.
331     * <p>
332     * This statistic can be used to perform a two-sample t-test to compare
333     * sample means.</p>
334     * <p>
335      * The returned  t-statistic is</p>
336     * <p>
337     * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
338     * </p><p>
339     * where <strong><code>n1</code></strong> is the size of the first sample;
340     * <strong><code> n2</code></strong> is the size of the second sample;
341     * <strong><code> m1</code></strong> is the mean of the first sample;
342     * <strong><code> m2</code></strong> is the mean of the second sample
343     * <strong><code> var1</code></strong> is the variance of the first sample;
344     * <strong><code> var2</code></strong> is the variance of the second sample
345     * </p><p>
346     * <strong>Preconditions</strong>: <ul>
347     * <li>The datasets described by the two Univariates must each contain
348     * at least 2 observations.
349     * </li></ul></p>
350     *
351     * @param sampleStats1 StatisticalSummary describing data from the first sample
352     * @param sampleStats2 StatisticalSummary describing data from the second sample
353     * @return t statistic
354     * @throws NullArgumentException if the sample statistics are <code>null</code>
355     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
356     */
357    public double t(final StatisticalSummary sampleStats1,
358                    final StatisticalSummary sampleStats2)
359        throws NullArgumentException, NumberIsTooSmallException {
360
361        checkSampleData(sampleStats1);
362        checkSampleData(sampleStats2);
363        return t(sampleStats1.getMean(), sampleStats2.getMean(),
364                 sampleStats1.getVariance(), sampleStats2.getVariance(),
365                 sampleStats1.getN(), sampleStats2.getN());
366
367    }
368
369    /**
370     * Computes a 2-sample t statistic, comparing the means of the datasets
371     * described by two {@link StatisticalSummary} instances, under the
372     * assumption of equal subpopulation variances.  To compute a t-statistic
373     * without the equal variances assumption, use
374     * {@link #t(StatisticalSummary, StatisticalSummary)}.
375     * <p>
376     * This statistic can be used to perform a (homoscedastic) two-sample
377     * t-test to compare sample means.</p>
378     * <p>
379     * The t-statistic returned is</p>
380     * <p>
381     * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
382     * </p><p>
383     * where <strong><code>n1</code></strong> is the size of first sample;
384     * <strong><code> n2</code></strong> is the size of second sample;
385     * <strong><code> m1</code></strong> is the mean of first sample;
386     * <strong><code> m2</code></strong> is the mean of second sample
387     * and <strong><code>var</code></strong> is the pooled variance estimate:
388     * </p><p>
389     * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
390     * </p><p>
391     * with <strong><code>var1</code></strong> the variance of the first sample and
392     * <strong><code>var2</code></strong> the variance of the second sample.
393     * </p><p>
394     * <strong>Preconditions</strong>: <ul>
395     * <li>The datasets described by the two Univariates must each contain
396     * at least 2 observations.
397     * </li></ul></p>
398     *
399     * @param sampleStats1 StatisticalSummary describing data from the first sample
400     * @param sampleStats2 StatisticalSummary describing data from the second sample
401     * @return t statistic
402     * @throws NullArgumentException if the sample statistics are <code>null</code>
403     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
404     */
405    public double homoscedasticT(final StatisticalSummary sampleStats1,
406                                 final StatisticalSummary sampleStats2)
407        throws NullArgumentException, NumberIsTooSmallException {
408
409        checkSampleData(sampleStats1);
410        checkSampleData(sampleStats2);
411        return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
412                              sampleStats1.getVariance(), sampleStats2.getVariance(),
413                              sampleStats1.getN(), sampleStats2.getN());
414
415    }
416
417    /**
418     * Returns the <i>observed significance level</i>, or
419     * <i>p-value</i>, associated with a one-sample, two-tailed t-test
420     * comparing the mean of the input array with the constant <code>mu</code>.
421     * <p>
422     * The number returned is the smallest significance level
423     * at which one can reject the null hypothesis that the mean equals
424     * <code>mu</code> in favor of the two-sided alternative that the mean
425     * is different from <code>mu</code>. For a one-sided test, divide the
426     * returned value by 2.</p>
427     * <p>
428     * <strong>Usage Note:</strong><br>
429     * The validity of the test depends on the assumptions of the parametric
430     * t-test procedure, as discussed
431     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
432     * </p><p>
433     * <strong>Preconditions</strong>: <ul>
434     * <li>The observed array length must be at least 2.
435     * </li></ul></p>
436     *
437     * @param mu constant value to compare sample mean against
438     * @param sample array of sample data values
439     * @return p-value
440     * @throws NullArgumentException if the sample array is <code>null</code>
441     * @throws NumberIsTooSmallException if the length of the array is &lt; 2
442     * @throws MaxCountExceededException if an error occurs computing the p-value
443     */
444    public double tTest(final double mu, final double[] sample)
445        throws NullArgumentException, NumberIsTooSmallException,
446        MaxCountExceededException {
447
448        checkSampleData(sample);
449        // No try-catch or advertised exception because args have just been checked
450        return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
451                     sample.length);
452
453    }
454
455    /**
456     * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
457     * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
458     * which <code>sample</code> is drawn equals <code>mu</code>.
459     * <p>
460     * Returns <code>true</code> iff the null hypothesis can be
461     * rejected with confidence <code>1 - alpha</code>.  To
462     * perform a 1-sided test, use <code>alpha * 2</code></p>
463     * <p>
464     * <strong>Examples:</strong><br><ol>
465     * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
466     * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
467     * </li>
468     * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
469     * at the 99% level, first verify that the measured sample mean is less
470     * than <code>mu</code> and then use
471     * <br><code>tTest(mu, sample, 0.02) </code>
472     * </li></ol></p>
473     * <p>
474     * <strong>Usage Note:</strong><br>
475     * The validity of the test depends on the assumptions of the one-sample
476     * parametric t-test procedure, as discussed
477     * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
478     * </p><p>
479     * <strong>Preconditions</strong>: <ul>
480     * <li>The observed array length must be at least 2.
481     * </li></ul></p>
482     *
483     * @param mu constant value to compare sample mean against
484     * @param sample array of sample data values
485     * @param alpha significance level of the test
486     * @return p-value
487     * @throws NullArgumentException if the sample array is <code>null</code>
488     * @throws NumberIsTooSmallException if the length of the array is &lt; 2
489     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
490     * @throws MaxCountExceededException if an error computing the p-value
491     */
492    public boolean tTest(final double mu, final double[] sample, final double alpha)
493        throws NullArgumentException, NumberIsTooSmallException,
494        OutOfRangeException, MaxCountExceededException {
495
496        checkSignificanceLevel(alpha);
497        return tTest(mu, sample) < alpha;
498
499    }
500
501    /**
502     * Returns the <i>observed significance level</i>, or
503     * <i>p-value</i>, associated with a one-sample, two-tailed t-test
504     * comparing the mean of the dataset described by <code>sampleStats</code>
505     * with the constant <code>mu</code>.
506     * <p>
507     * The number returned is the smallest significance level
508     * at which one can reject the null hypothesis that the mean equals
509     * <code>mu</code> in favor of the two-sided alternative that the mean
510     * is different from <code>mu</code>. For a one-sided test, divide the
511     * returned value by 2.</p>
512     * <p>
513     * <strong>Usage Note:</strong><br>
514     * The validity of the test depends on the assumptions of the parametric
515     * t-test procedure, as discussed
516     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
517     * here</a></p>
518     * <p>
519     * <strong>Preconditions</strong>: <ul>
520     * <li>The sample must contain at least 2 observations.
521     * </li></ul></p>
522     *
523     * @param mu constant value to compare sample mean against
524     * @param sampleStats StatisticalSummary describing sample data
525     * @return p-value
526     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
527     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
528     * @throws MaxCountExceededException if an error occurs computing the p-value
529     */
530    public double tTest(final double mu, final StatisticalSummary sampleStats)
531        throws NullArgumentException, NumberIsTooSmallException,
532        MaxCountExceededException {
533
534        checkSampleData(sampleStats);
535        return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
536                     sampleStats.getN());
537
538    }
539
540    /**
541     * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
542     * two-sided t-test</a> evaluating the null hypothesis that the mean of the
543     * population from which the dataset described by <code>stats</code> is
544     * drawn equals <code>mu</code>.
545     * <p>
546     * Returns <code>true</code> iff the null hypothesis can be rejected with
547     * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
548     * <code>alpha * 2.</code></p>
549     * <p>
550     * <strong>Examples:</strong><br><ol>
551     * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
552     * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
553     * </li>
554     * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
555     * at the 99% level, first verify that the measured sample mean is less
556     * than <code>mu</code> and then use
557     * <br><code>tTest(mu, sampleStats, 0.02) </code>
558     * </li></ol></p>
559     * <p>
560     * <strong>Usage Note:</strong><br>
561     * The validity of the test depends on the assumptions of the one-sample
562     * parametric t-test procedure, as discussed
563     * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
564     * </p><p>
565     * <strong>Preconditions</strong>: <ul>
566     * <li>The sample must include at least 2 observations.
567     * </li></ul></p>
568     *
569     * @param mu constant value to compare sample mean against
570     * @param sampleStats StatisticalSummary describing sample data values
571     * @param alpha significance level of the test
572     * @return p-value
573     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
574     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
575     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
576     * @throws MaxCountExceededException if an error occurs computing the p-value
577     */
578    public boolean tTest(final double mu, final StatisticalSummary sampleStats,
579                         final double alpha)
580    throws NullArgumentException, NumberIsTooSmallException,
581    OutOfRangeException, MaxCountExceededException {
582
583        checkSignificanceLevel(alpha);
584        return tTest(mu, sampleStats) < alpha;
585
586    }
587
588    /**
589     * Returns the <i>observed significance level</i>, or
590     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
591     * comparing the means of the input arrays.
592     * <p>
593     * The number returned is the smallest significance level
594     * at which one can reject the null hypothesis that the two means are
595     * equal in favor of the two-sided alternative that they are different.
596     * For a one-sided test, divide the returned value by 2.</p>
597     * <p>
598     * The test does not assume that the underlying popuation variances are
599     * equal  and it uses approximated degrees of freedom computed from the
600     * sample data to compute the p-value.  The t-statistic used is as defined in
601     * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
602     * to the degrees of freedom is used,
603     * as described
604     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
605     * here.</a>  To perform the test under the assumption of equal subpopulation
606     * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
607     * <p>
608     * <strong>Usage Note:</strong><br>
609     * The validity of the p-value depends on the assumptions of the parametric
610     * t-test procedure, as discussed
611     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
612     * here</a></p>
613     * <p>
614     * <strong>Preconditions</strong>: <ul>
615     * <li>The observed array lengths must both be at least 2.
616     * </li></ul></p>
617     *
618     * @param sample1 array of sample data values
619     * @param sample2 array of sample data values
620     * @return p-value for t-test
621     * @throws NullArgumentException if the arrays are <code>null</code>
622     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
623     * @throws MaxCountExceededException if an error occurs computing the p-value
624     */
625    public double tTest(final double[] sample1, final double[] sample2)
626        throws NullArgumentException, NumberIsTooSmallException,
627        MaxCountExceededException {
628
629        checkSampleData(sample1);
630        checkSampleData(sample2);
631        // No try-catch or advertised exception because args have just been checked
632        return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
633                     StatUtils.variance(sample1), StatUtils.variance(sample2),
634                     sample1.length, sample2.length);
635
636    }
637
638    /**
639     * Returns the <i>observed significance level</i>, or
640     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
641     * comparing the means of the input arrays, under the assumption that
642     * the two samples are drawn from subpopulations with equal variances.
643     * To perform the test without the equal variances assumption, use
644     * {@link #tTest(double[], double[])}.</p>
645     * <p>
646     * The number returned is the smallest significance level
647     * at which one can reject the null hypothesis that the two means are
648     * equal in favor of the two-sided alternative that they are different.
649     * For a one-sided test, divide the returned value by 2.</p>
650     * <p>
651     * A pooled variance estimate is used to compute the t-statistic.  See
652     * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
653     * minus 2 is used as the degrees of freedom.</p>
654     * <p>
655     * <strong>Usage Note:</strong><br>
656     * The validity of the p-value depends on the assumptions of the parametric
657     * t-test procedure, as discussed
658     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
659     * here</a></p>
660     * <p>
661     * <strong>Preconditions</strong>: <ul>
662     * <li>The observed array lengths must both be at least 2.
663     * </li></ul></p>
664     *
665     * @param sample1 array of sample data values
666     * @param sample2 array of sample data values
667     * @return p-value for t-test
668     * @throws NullArgumentException if the arrays are <code>null</code>
669     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
670     * @throws MaxCountExceededException if an error occurs computing the p-value
671     */
672    public double homoscedasticTTest(final double[] sample1, final double[] sample2)
673        throws NullArgumentException, NumberIsTooSmallException,
674        MaxCountExceededException {
675
676        checkSampleData(sample1);
677        checkSampleData(sample2);
678        // No try-catch or advertised exception because args have just been checked
679        return homoscedasticTTest(StatUtils.mean(sample1),
680                                  StatUtils.mean(sample2),
681                                  StatUtils.variance(sample1),
682                                  StatUtils.variance(sample2),
683                                  sample1.length, sample2.length);
684
685    }
686
687    /**
688     * Performs a
689     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
690     * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
691     * and <code>sample2</code> are drawn from populations with the same mean,
692     * with significance level <code>alpha</code>.  This test does not assume
693     * that the subpopulation variances are equal.  To perform the test assuming
694     * equal variances, use
695     * {@link #homoscedasticTTest(double[], double[], double)}.
696     * <p>
697     * Returns <code>true</code> iff the null hypothesis that the means are
698     * equal can be rejected with confidence <code>1 - alpha</code>.  To
699     * perform a 1-sided test, use <code>alpha * 2</code></p>
700     * <p>
701     * See {@link #t(double[], double[])} for the formula used to compute the
702     * t-statistic.  Degrees of freedom are approximated using the
703     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
704     * Welch-Satterthwaite approximation.</a></p>
705     * <p>
706     * <strong>Examples:</strong><br><ol>
707     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
708     * the 95% level,  use
709     * <br><code>tTest(sample1, sample2, 0.05). </code>
710     * </li>
711     * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
712     * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
713     * is less than the mean of <code>sample 2</code> and then use
714     * <br><code>tTest(sample1, sample2, 0.02) </code>
715     * </li></ol></p>
716     * <p>
717     * <strong>Usage Note:</strong><br>
718     * The validity of the test depends on the assumptions of the parametric
719     * t-test procedure, as discussed
720     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
721     * here</a></p>
722     * <p>
723     * <strong>Preconditions</strong>: <ul>
724     * <li>The observed array lengths must both be at least 2.
725     * </li>
726     * <li> <code> 0 < alpha < 0.5 </code>
727     * </li></ul></p>
728     *
729     * @param sample1 array of sample data values
730     * @param sample2 array of sample data values
731     * @param alpha significance level of the test
732     * @return true if the null hypothesis can be rejected with
733     * confidence 1 - alpha
734     * @throws NullArgumentException if the arrays are <code>null</code>
735     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
736     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
737     * @throws MaxCountExceededException if an error occurs computing the p-value
738     */
739    public boolean tTest(final double[] sample1, final double[] sample2,
740                         final double alpha)
741        throws NullArgumentException, NumberIsTooSmallException,
742        OutOfRangeException, MaxCountExceededException {
743
744        checkSignificanceLevel(alpha);
745        return tTest(sample1, sample2) < alpha;
746
747    }
748
749    /**
750     * Performs a
751     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
752     * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
753     * and <code>sample2</code> are drawn from populations with the same mean,
754     * with significance level <code>alpha</code>,  assuming that the
755     * subpopulation variances are equal.  Use
756     * {@link #tTest(double[], double[], double)} to perform the test without
757     * the assumption of equal variances.
758     * <p>
759     * Returns <code>true</code> iff the null hypothesis that the means are
760     * equal can be rejected with confidence <code>1 - alpha</code>.  To
761     * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
762     * without the assumption of equal subpopulation variances, use
763     * {@link #tTest(double[], double[], double)}.</p>
764     * <p>
765     * A pooled variance estimate is used to compute the t-statistic. See
766     * {@link #t(double[], double[])} for the formula. The sum of the sample
767     * sizes minus 2 is used as the degrees of freedom.</p>
768     * <p>
769     * <strong>Examples:</strong><br><ol>
770     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
771     * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
772     * </li>
773     * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
774     * at the 99% level, first verify that the measured mean of
775     * <code>sample 1</code> is less than the mean of <code>sample 2</code>
776     * and then use
777     * <br><code>tTest(sample1, sample2, 0.02) </code>
778     * </li></ol></p>
779     * <p>
780     * <strong>Usage Note:</strong><br>
781     * The validity of the test depends on the assumptions of the parametric
782     * t-test procedure, as discussed
783     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
784     * here</a></p>
785     * <p>
786     * <strong>Preconditions</strong>: <ul>
787     * <li>The observed array lengths must both be at least 2.
788     * </li>
789     * <li> <code> 0 < alpha < 0.5 </code>
790     * </li></ul></p>
791     *
792     * @param sample1 array of sample data values
793     * @param sample2 array of sample data values
794     * @param alpha significance level of the test
795     * @return true if the null hypothesis can be rejected with
796     * confidence 1 - alpha
797     * @throws NullArgumentException if the arrays are <code>null</code>
798     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
799     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
800     * @throws MaxCountExceededException if an error occurs computing the p-value
801     */
802    public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
803                                      final double alpha)
804        throws NullArgumentException, NumberIsTooSmallException,
805        OutOfRangeException, MaxCountExceededException {
806
807        checkSignificanceLevel(alpha);
808        return homoscedasticTTest(sample1, sample2) < alpha;
809
810    }
811
812    /**
813     * Returns the <i>observed significance level</i>, or
814     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
815     * comparing the means of the datasets described by two StatisticalSummary
816     * instances.
817     * <p>
818     * The number returned is the smallest significance level
819     * at which one can reject the null hypothesis that the two means are
820     * equal in favor of the two-sided alternative that they are different.
821     * For a one-sided test, divide the returned value by 2.</p>
822     * <p>
823     * The test does not assume that the underlying population variances are
824     * equal  and it uses approximated degrees of freedom computed from the
825     * sample data to compute the p-value.   To perform the test assuming
826     * equal variances, use
827     * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
828     * <p>
829     * <strong>Usage Note:</strong><br>
830     * The validity of the p-value depends on the assumptions of the parametric
831     * t-test procedure, as discussed
832     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
833     * here</a></p>
834     * <p>
835     * <strong>Preconditions</strong>: <ul>
836     * <li>The datasets described by the two Univariates must each contain
837     * at least 2 observations.
838     * </li></ul></p>
839     *
840     * @param sampleStats1  StatisticalSummary describing data from the first sample
841     * @param sampleStats2  StatisticalSummary describing data from the second sample
842     * @return p-value for t-test
843     * @throws NullArgumentException if the sample statistics are <code>null</code>
844     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
845     * @throws MaxCountExceededException if an error occurs computing the p-value
846     */
847    public double tTest(final StatisticalSummary sampleStats1,
848                        final StatisticalSummary sampleStats2)
849        throws NullArgumentException, NumberIsTooSmallException,
850        MaxCountExceededException {
851
852        checkSampleData(sampleStats1);
853        checkSampleData(sampleStats2);
854        return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
855                     sampleStats1.getVariance(), sampleStats2.getVariance(),
856                     sampleStats1.getN(), sampleStats2.getN());
857
858    }
859
860    /**
861     * Returns the <i>observed significance level</i>, or
862     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
863     * comparing the means of the datasets described by two StatisticalSummary
864     * instances, under the hypothesis of equal subpopulation variances. To
865     * perform a test without the equal variances assumption, use
866     * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
867     * <p>
868     * The number returned is the smallest significance level
869     * at which one can reject the null hypothesis that the two means are
870     * equal in favor of the two-sided alternative that they are different.
871     * For a one-sided test, divide the returned value by 2.</p>
872     * <p>
873     * See {@link #homoscedasticT(double[], double[])} for the formula used to
874     * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
875     * the degrees of freedom.</p>
876     * <p>
877     * <strong>Usage Note:</strong><br>
878     * The validity of the p-value depends on the assumptions of the parametric
879     * t-test procedure, as discussed
880     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
881     * </p><p>
882     * <strong>Preconditions</strong>: <ul>
883     * <li>The datasets described by the two Univariates must each contain
884     * at least 2 observations.
885     * </li></ul></p>
886     *
887     * @param sampleStats1  StatisticalSummary describing data from the first sample
888     * @param sampleStats2  StatisticalSummary describing data from the second sample
889     * @return p-value for t-test
890     * @throws NullArgumentException if the sample statistics are <code>null</code>
891     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
892     * @throws MaxCountExceededException if an error occurs computing the p-value
893     */
894    public double homoscedasticTTest(final StatisticalSummary sampleStats1,
895                                     final StatisticalSummary sampleStats2)
896        throws NullArgumentException, NumberIsTooSmallException,
897        MaxCountExceededException {
898
899        checkSampleData(sampleStats1);
900        checkSampleData(sampleStats2);
901        return homoscedasticTTest(sampleStats1.getMean(),
902                                  sampleStats2.getMean(),
903                                  sampleStats1.getVariance(),
904                                  sampleStats2.getVariance(),
905                                  sampleStats1.getN(), sampleStats2.getN());
906
907    }
908
909    /**
910     * Performs a
911     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
912     * two-sided t-test</a> evaluating the null hypothesis that
913     * <code>sampleStats1</code> and <code>sampleStats2</code> describe
914     * datasets drawn from populations with the same mean, with significance
915     * level <code>alpha</code>.   This test does not assume that the
916     * subpopulation variances are equal.  To perform the test under the equal
917     * variances assumption, use
918     * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
919     * <p>
920     * Returns <code>true</code> iff the null hypothesis that the means are
921     * equal can be rejected with confidence <code>1 - alpha</code>.  To
922     * perform a 1-sided test, use <code>alpha * 2</code></p>
923     * <p>
924     * See {@link #t(double[], double[])} for the formula used to compute the
925     * t-statistic.  Degrees of freedom are approximated using the
926     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
927     * Welch-Satterthwaite approximation.</a></p>
928     * <p>
929     * <strong>Examples:</strong><br><ol>
930     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
931     * the 95%, use
932     * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
933     * </li>
934     * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
935     * at the 99% level,  first verify that the measured mean of
936     * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
937     * and then use
938     * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
939     * </li></ol></p>
940     * <p>
941     * <strong>Usage Note:</strong><br>
942     * The validity of the test depends on the assumptions of the parametric
943     * t-test procedure, as discussed
944     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
945     * here</a></p>
946     * <p>
947     * <strong>Preconditions</strong>: <ul>
948     * <li>The datasets described by the two Univariates must each contain
949     * at least 2 observations.
950     * </li>
951     * <li> <code> 0 < alpha < 0.5 </code>
952     * </li></ul></p>
953     *
954     * @param sampleStats1 StatisticalSummary describing sample data values
955     * @param sampleStats2 StatisticalSummary describing sample data values
956     * @param alpha significance level of the test
957     * @return true if the null hypothesis can be rejected with
958     * confidence 1 - alpha
959     * @throws NullArgumentException if the sample statistics are <code>null</code>
960     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
961     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
962     * @throws MaxCountExceededException if an error occurs computing the p-value
963     */
964    public boolean tTest(final StatisticalSummary sampleStats1,
965                         final StatisticalSummary sampleStats2,
966                         final double alpha)
967        throws NullArgumentException, NumberIsTooSmallException,
968        OutOfRangeException, MaxCountExceededException {
969
970        checkSignificanceLevel(alpha);
971        return tTest(sampleStats1, sampleStats2) < alpha;
972
973    }
974
975    //----------------------------------------------- Protected methods
976
977    /**
978     * Computes approximate degrees of freedom for 2-sample t-test.
979     *
980     * @param v1 first sample variance
981     * @param v2 second sample variance
982     * @param n1 first sample n
983     * @param n2 second sample n
984     * @return approximate degrees of freedom
985     */
986    protected double df(double v1, double v2, double n1, double n2) {
987        return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
988        ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
989                (n2 * n2 * (n2 - 1d)));
990    }
991
992    /**
993     * Computes t test statistic for 1-sample t-test.
994     *
995     * @param m sample mean
996     * @param mu constant to test against
997     * @param v sample variance
998     * @param n sample n
999     * @return t test statistic
1000     */
1001    protected double t(final double m, final double mu,
1002                       final double v, final double n) {
1003        return (m - mu) / FastMath.sqrt(v / n);
1004    }
1005
1006    /**
1007     * Computes t test statistic for 2-sample t-test.
1008     * <p>
1009     * Does not assume that subpopulation variances are equal.</p>
1010     *
1011     * @param m1 first sample mean
1012     * @param m2 second sample mean
1013     * @param v1 first sample variance
1014     * @param v2 second sample variance
1015     * @param n1 first sample n
1016     * @param n2 second sample n
1017     * @return t test statistic
1018     */
1019    protected double t(final double m1, final double m2,
1020                       final double v1, final double v2,
1021                       final double n1, final double n2)  {
1022        return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1023    }
1024
1025    /**
1026     * Computes t test statistic for 2-sample t-test under the hypothesis
1027     * of equal subpopulation variances.
1028     *
1029     * @param m1 first sample mean
1030     * @param m2 second sample mean
1031     * @param v1 first sample variance
1032     * @param v2 second sample variance
1033     * @param n1 first sample n
1034     * @param n2 second sample n
1035     * @return t test statistic
1036     */
1037    protected double homoscedasticT(final double m1, final double m2,
1038                                    final double v1, final double v2,
1039                                    final double n1, final double n2)  {
1040        final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1041        return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1042    }
1043
1044    /**
1045     * Computes p-value for 2-sided, 1-sample t-test.
1046     *
1047     * @param m sample mean
1048     * @param mu constant to test against
1049     * @param v sample variance
1050     * @param n sample n
1051     * @return p-value
1052     * @throws MaxCountExceededException if an error occurs computing the p-value
1053     * @throws MathIllegalArgumentException if n is not greater than 1
1054     */
1055    protected double tTest(final double m, final double mu,
1056                           final double v, final double n)
1057        throws MaxCountExceededException, MathIllegalArgumentException {
1058
1059        final double t = FastMath.abs(t(m, mu, v, n));
1060        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1061        final TDistribution distribution = new TDistribution(null, n - 1);
1062        return 2.0 * distribution.cumulativeProbability(-t);
1063
1064    }
1065
1066    /**
1067     * Computes p-value for 2-sided, 2-sample t-test.
1068     * <p>
1069     * Does not assume subpopulation variances are equal. Degrees of freedom
1070     * are estimated from the data.</p>
1071     *
1072     * @param m1 first sample mean
1073     * @param m2 second sample mean
1074     * @param v1 first sample variance
1075     * @param v2 second sample variance
1076     * @param n1 first sample n
1077     * @param n2 second sample n
1078     * @return p-value
1079     * @throws MaxCountExceededException if an error occurs computing the p-value
1080     * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1081     * strictly positive
1082     */
1083    protected double tTest(final double m1, final double m2,
1084                           final double v1, final double v2,
1085                           final double n1, final double n2)
1086        throws MaxCountExceededException, NotStrictlyPositiveException {
1087
1088        final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1089        final double degreesOfFreedom = df(v1, v2, n1, n2);
1090        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1091        final TDistribution distribution = new TDistribution(null, degreesOfFreedom);
1092        return 2.0 * distribution.cumulativeProbability(-t);
1093
1094    }
1095
1096    /**
1097     * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1098     * of equal subpopulation variances.
1099     * <p>
1100     * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1101     *
1102     * @param m1 first sample mean
1103     * @param m2 second sample mean
1104     * @param v1 first sample variance
1105     * @param v2 second sample variance
1106     * @param n1 first sample n
1107     * @param n2 second sample n
1108     * @return p-value
1109     * @throws MaxCountExceededException if an error occurs computing the p-value
1110     * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1111     * strictly positive
1112     */
1113    protected double homoscedasticTTest(double m1, double m2,
1114                                        double v1, double v2,
1115                                        double n1, double n2)
1116        throws MaxCountExceededException, NotStrictlyPositiveException {
1117
1118        final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1119        final double degreesOfFreedom = n1 + n2 - 2;
1120        // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1121        final TDistribution distribution = new TDistribution(null, degreesOfFreedom);
1122        return 2.0 * distribution.cumulativeProbability(-t);
1123
1124    }
1125
1126    /**
1127     * Check significance level.
1128     *
1129     * @param alpha significance level
1130     * @throws OutOfRangeException if the significance level is out of bounds.
1131     */
1132    private void checkSignificanceLevel(final double alpha)
1133        throws OutOfRangeException {
1134
1135        if (alpha <= 0 || alpha > 0.5) {
1136            throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1137                                          alpha, 0.0, 0.5);
1138        }
1139
1140    }
1141
1142    /**
1143     * Check sample data.
1144     *
1145     * @param data Sample data.
1146     * @throws NullArgumentException if {@code data} is {@code null}.
1147     * @throws NumberIsTooSmallException if there is not enough sample data.
1148     */
1149    private void checkSampleData(final double[] data)
1150        throws NullArgumentException, NumberIsTooSmallException {
1151
1152        if (data == null) {
1153            throw new NullArgumentException();
1154        }
1155        if (data.length < 2) {
1156            throw new NumberIsTooSmallException(
1157                    LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1158                    data.length, 2, true);
1159        }
1160
1161    }
1162
1163    /**
1164     * Check sample data.
1165     *
1166     * @param stat Statistical summary.
1167     * @throws NullArgumentException if {@code data} is {@code null}.
1168     * @throws NumberIsTooSmallException if there is not enough sample data.
1169     */
1170    private void checkSampleData(final StatisticalSummary stat)
1171        throws NullArgumentException, NumberIsTooSmallException {
1172
1173        if (stat == null) {
1174            throw new NullArgumentException();
1175        }
1176        if (stat.getN() < 2) {
1177            throw new NumberIsTooSmallException(
1178                    LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1179                    stat.getN(), 2, true);
1180        }
1181
1182    }
1183
1184}