001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.inference;
018
019import org.apache.commons.math3.distribution.TDistribution;
020import org.apache.commons.math3.exception.DimensionMismatchException;
021import org.apache.commons.math3.exception.MathIllegalArgumentException;
022import org.apache.commons.math3.exception.MaxCountExceededException;
023import org.apache.commons.math3.exception.NoDataException;
024import org.apache.commons.math3.exception.NotStrictlyPositiveException;
025import org.apache.commons.math3.exception.NullArgumentException;
026import org.apache.commons.math3.exception.NumberIsTooSmallException;
027import org.apache.commons.math3.exception.OutOfRangeException;
028import org.apache.commons.math3.exception.util.LocalizedFormats;
029import org.apache.commons.math3.stat.StatUtils;
030import org.apache.commons.math3.stat.descriptive.StatisticalSummary;
031import org.apache.commons.math3.util.FastMath;
032
033/**
034 * An implementation for Student's t-tests.
035 * <p>
036 * Tests can be:<ul>
037 * <li>One-sample or two-sample</li>
038 * <li>One-sided or two-sided</li>
039 * <li>Paired or unpaired (for two-sample tests)</li>
040 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
041 * (for two sample tests)</li>
042 * <li>Fixed significance level (boolean-valued) or returning p-values.
043 * </li></ul></p>
044 * <p>
045 * Test statistics are available for all tests.  Methods including "Test" in
046 * in their names perform tests, all other methods return t-statistics.  Among
047 * the "Test" methods, <code>double-</code>valued methods return p-values;
048 * <code>boolean-</code>valued methods perform fixed significance level tests.
049 * Significance levels are always specified as numbers between 0 and 0.5
050 * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).</p>
051 * <p>
052 * Input to tests can be either <code>double[]</code> arrays or
053 * {@link StatisticalSummary} instances.</p><p>
054 * Uses commons-math {@link org.apache.commons.math3.distribution.TDistribution}
055 * implementation to estimate exact p-values.</p>
056 *
057 * @version $Id: TTest.java 1416643 2012-12-03 19:37:14Z tn $
058 */
059public class TTest {
060    /**
061     * Computes a paired, 2-sample t-statistic based on the data in the input
062     * arrays.  The t-statistic returned is equivalent to what would be returned by
063     * computing the one-sample t-statistic {@link #t(double, double[])}, with
064     * <code>mu = 0</code> and the sample array consisting of the (signed)
065     * differences between corresponding entries in <code>sample1</code> and
066     * <code>sample2.</code>
067     * <p>
068     * <strong>Preconditions</strong>: <ul>
069     * <li>The input arrays must have the same length and their common length
070     * must be at least 2.
071     * </li></ul></p>
072     *
073     * @param sample1 array of sample data values
074     * @param sample2 array of sample data values
075     * @return t statistic
076     * @throws NullArgumentException if the arrays are <code>null</code>
077     * @throws NoDataException if the arrays are empty
078     * @throws DimensionMismatchException if the length of the arrays is not equal
079     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
080     */
081    public double pairedT(final double[] sample1, final double[] sample2)
082        throws NullArgumentException, NoDataException,
083        DimensionMismatchException, NumberIsTooSmallException {
084
085        checkSampleData(sample1);
086        checkSampleData(sample2);
087        double meanDifference = StatUtils.meanDifference(sample1, sample2);
088        return t(meanDifference, 0,
089                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
090                 sample1.length);
091
092    }
093
094    /**
095     * Returns the <i>observed significance level</i>, or
096     * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
097     * based on the data in the input arrays.
098     * <p>
099     * The number returned is the smallest significance level
100     * at which one can reject the null hypothesis that the mean of the paired
101     * differences is 0 in favor of the two-sided alternative that the mean paired
102     * difference is not equal to 0. For a one-sided test, divide the returned
103     * value by 2.</p>
104     * <p>
105     * This test is equivalent to a one-sample t-test computed using
106     * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
107     * array consisting of the signed differences between corresponding elements of
108     * <code>sample1</code> and <code>sample2.</code></p>
109     * <p>
110     * <strong>Usage Note:</strong><br>
111     * The validity of the p-value depends on the assumptions of the parametric
112     * t-test procedure, as discussed
113     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
114     * here</a></p>
115     * <p>
116     * <strong>Preconditions</strong>: <ul>
117     * <li>The input array lengths must be the same and their common length must
118     * be at least 2.
119     * </li></ul></p>
120     *
121     * @param sample1 array of sample data values
122     * @param sample2 array of sample data values
123     * @return p-value for t-test
124     * @throws NullArgumentException if the arrays are <code>null</code>
125     * @throws NoDataException if the arrays are empty
126     * @throws DimensionMismatchException if the length of the arrays is not equal
127     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
128     * @throws MaxCountExceededException if an error occurs computing the p-value
129     */
130    public double pairedTTest(final double[] sample1, final double[] sample2)
131        throws NullArgumentException, NoDataException, DimensionMismatchException,
132        NumberIsTooSmallException, MaxCountExceededException {
133
134        double meanDifference = StatUtils.meanDifference(sample1, sample2);
135        return tTest(meanDifference, 0,
136                StatUtils.varianceDifference(sample1, sample2, meanDifference),
137                sample1.length);
138
139    }
140
141    /**
142     * Performs a paired t-test evaluating the null hypothesis that the
143     * mean of the paired differences between <code>sample1</code> and
144     * <code>sample2</code> is 0 in favor of the two-sided alternative that the
145     * mean paired difference is not equal to 0, with significance level
146     * <code>alpha</code>.
147     * <p>
148     * Returns <code>true</code> iff the null hypothesis can be rejected with
149     * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
150     * <code>alpha * 2</code></p>
151     * <p>
152     * <strong>Usage Note:</strong><br>
153     * The validity of the test depends on the assumptions of the parametric
154     * t-test procedure, as discussed
155     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
156     * here</a></p>
157     * <p>
158     * <strong>Preconditions</strong>: <ul>
159     * <li>The input array lengths must be the same and their common length
160     * must be at least 2.
161     * </li>
162     * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
163     * </li></ul></p>
164     *
165     * @param sample1 array of sample data values
166     * @param sample2 array of sample data values
167     * @param alpha significance level of the test
168     * @return true if the null hypothesis can be rejected with
169     * confidence 1 - alpha
170     * @throws NullArgumentException if the arrays are <code>null</code>
171     * @throws NoDataException if the arrays are empty
172     * @throws DimensionMismatchException if the length of the arrays is not equal
173     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
174     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
175     * @throws MaxCountExceededException if an error occurs computing the p-value
176     */
177    public boolean pairedTTest(final double[] sample1, final double[] sample2,
178                               final double alpha)
179        throws NullArgumentException, NoDataException, DimensionMismatchException,
180        NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
181
182        checkSignificanceLevel(alpha);
183        return pairedTTest(sample1, sample2) < alpha;
184
185    }
186
187    /**
188     * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
189     * t statistic </a> given observed values and a comparison constant.
190     * <p>
191     * This statistic can be used to perform a one sample t-test for the mean.
192     * </p><p>
193     * <strong>Preconditions</strong>: <ul>
194     * <li>The observed array length must be at least 2.
195     * </li></ul></p>
196     *
197     * @param mu comparison constant
198     * @param observed array of values
199     * @return t statistic
200     * @throws NullArgumentException if <code>observed</code> is <code>null</code>
201     * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
202     */
203    public double t(final double mu, final double[] observed)
204        throws NullArgumentException, NumberIsTooSmallException {
205
206        checkSampleData(observed);
207        // No try-catch or advertised exception because args have just been checked
208        return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
209                observed.length);
210
211    }
212
213    /**
214     * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
215     * t statistic </a> to use in comparing the mean of the dataset described by
216     * <code>sampleStats</code> to <code>mu</code>.
217     * <p>
218     * This statistic can be used to perform a one sample t-test for the mean.
219     * </p><p>
220     * <strong>Preconditions</strong>: <ul>
221     * <li><code>observed.getN() &ge; 2</code>.
222     * </li></ul></p>
223     *
224     * @param mu comparison constant
225     * @param sampleStats DescriptiveStatistics holding sample summary statitstics
226     * @return t statistic
227     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
228     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
229     */
230    public double t(final double mu, final StatisticalSummary sampleStats)
231        throws NullArgumentException, NumberIsTooSmallException {
232
233        checkSampleData(sampleStats);
234        return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
235                 sampleStats.getN());
236
237    }
238
239    /**
240     * Computes a 2-sample t statistic,  under the hypothesis of equal
241     * subpopulation variances.  To compute a t-statistic without the
242     * equal variances hypothesis, use {@link #t(double[], double[])}.
243     * <p>
244     * This statistic can be used to perform a (homoscedastic) two-sample
245     * t-test to compare sample means.</p>
246     * <p>
247     * The t-statistic is</p>
248     * <p>
249     * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
250     * </p><p>
251     * where <strong><code>n1</code></strong> is the size of first sample;
252     * <strong><code> n2</code></strong> is the size of second sample;
253     * <strong><code> m1</code></strong> is the mean of first sample;
254     * <strong><code> m2</code></strong> is the mean of second sample</li>
255     * </ul>
256     * and <strong><code>var</code></strong> is the pooled variance estimate:
257     * </p><p>
258     * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
259     * </p><p>
260     * with <strong><code>var1</code></strong> the variance of the first sample and
261     * <strong><code>var2</code></strong> the variance of the second sample.
262     * </p><p>
263     * <strong>Preconditions</strong>: <ul>
264     * <li>The observed array lengths must both be at least 2.
265     * </li></ul></p>
266     *
267     * @param sample1 array of sample data values
268     * @param sample2 array of sample data values
269     * @return t statistic
270     * @throws NullArgumentException if the arrays are <code>null</code>
271     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
272     */
273    public double homoscedasticT(final double[] sample1, final double[] sample2)
274        throws NullArgumentException, NumberIsTooSmallException {
275
276        checkSampleData(sample1);
277        checkSampleData(sample2);
278        // No try-catch or advertised exception because args have just been checked
279        return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
280                              StatUtils.variance(sample1), StatUtils.variance(sample2),
281                              sample1.length, sample2.length);
282
283    }
284
285    /**
286     * Computes a 2-sample t statistic, without the hypothesis of equal
287     * subpopulation variances.  To compute a t-statistic assuming equal
288     * variances, use {@link #homoscedasticT(double[], double[])}.
289     * <p>
290     * This statistic can be used to perform a two-sample t-test to compare
291     * sample means.</p>
292     * <p>
293     * The t-statistic is</p>
294     * <p>
295     * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
296     * </p><p>
297     *  where <strong><code>n1</code></strong> is the size of the first sample
298     * <strong><code> n2</code></strong> is the size of the second sample;
299     * <strong><code> m1</code></strong> is the mean of the first sample;
300     * <strong><code> m2</code></strong> is the mean of the second sample;
301     * <strong><code> var1</code></strong> is the variance of the first sample;
302     * <strong><code> var2</code></strong> is the variance of the second sample;
303     * </p><p>
304     * <strong>Preconditions</strong>: <ul>
305     * <li>The observed array lengths must both be at least 2.
306     * </li></ul></p>
307     *
308     * @param sample1 array of sample data values
309     * @param sample2 array of sample data values
310     * @return t statistic
311     * @throws NullArgumentException if the arrays are <code>null</code>
312     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
313     */
314    public double t(final double[] sample1, final double[] sample2)
315        throws NullArgumentException, NumberIsTooSmallException {
316
317        checkSampleData(sample1);
318        checkSampleData(sample2);
319        // No try-catch or advertised exception because args have just been checked
320        return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
321                 StatUtils.variance(sample1), StatUtils.variance(sample2),
322                 sample1.length, sample2.length);
323
324    }
325
326    /**
327     * Computes a 2-sample t statistic </a>, comparing the means of the datasets
328     * described by two {@link StatisticalSummary} instances, without the
329     * assumption of equal subpopulation variances.  Use
330     * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
331     * compute a t-statistic under the equal variances assumption.
332     * <p>
333     * This statistic can be used to perform a two-sample t-test to compare
334     * sample means.</p>
335     * <p>
336      * The returned  t-statistic is</p>
337     * <p>
338     * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
339     * </p><p>
340     * where <strong><code>n1</code></strong> is the size of the first sample;
341     * <strong><code> n2</code></strong> is the size of the second sample;
342     * <strong><code> m1</code></strong> is the mean of the first sample;
343     * <strong><code> m2</code></strong> is the mean of the second sample
344     * <strong><code> var1</code></strong> is the variance of the first sample;
345     * <strong><code> var2</code></strong> is the variance of the second sample
346     * </p><p>
347     * <strong>Preconditions</strong>: <ul>
348     * <li>The datasets described by the two Univariates must each contain
349     * at least 2 observations.
350     * </li></ul></p>
351     *
352     * @param sampleStats1 StatisticalSummary describing data from the first sample
353     * @param sampleStats2 StatisticalSummary describing data from the second sample
354     * @return t statistic
355     * @throws NullArgumentException if the sample statistics are <code>null</code>
356     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
357     */
358    public double t(final StatisticalSummary sampleStats1,
359                    final StatisticalSummary sampleStats2)
360        throws NullArgumentException, NumberIsTooSmallException {
361
362        checkSampleData(sampleStats1);
363        checkSampleData(sampleStats2);
364        return t(sampleStats1.getMean(), sampleStats2.getMean(),
365                 sampleStats1.getVariance(), sampleStats2.getVariance(),
366                 sampleStats1.getN(), sampleStats2.getN());
367
368    }
369
370    /**
371     * Computes a 2-sample t statistic, comparing the means of the datasets
372     * described by two {@link StatisticalSummary} instances, under the
373     * assumption of equal subpopulation variances.  To compute a t-statistic
374     * without the equal variances assumption, use
375     * {@link #t(StatisticalSummary, StatisticalSummary)}.
376     * <p>
377     * This statistic can be used to perform a (homoscedastic) two-sample
378     * t-test to compare sample means.</p>
379     * <p>
380     * The t-statistic returned is</p>
381     * <p>
382     * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
383     * </p><p>
384     * where <strong><code>n1</code></strong> is the size of first sample;
385     * <strong><code> n2</code></strong> is the size of second sample;
386     * <strong><code> m1</code></strong> is the mean of first sample;
387     * <strong><code> m2</code></strong> is the mean of second sample
388     * and <strong><code>var</code></strong> is the pooled variance estimate:
389     * </p><p>
390     * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
391     * </p><p>
392     * with <strong><code>var1</code></strong> the variance of the first sample and
393     * <strong><code>var2</code></strong> the variance of the second sample.
394     * </p><p>
395     * <strong>Preconditions</strong>: <ul>
396     * <li>The datasets described by the two Univariates must each contain
397     * at least 2 observations.
398     * </li></ul></p>
399     *
400     * @param sampleStats1 StatisticalSummary describing data from the first sample
401     * @param sampleStats2 StatisticalSummary describing data from the second sample
402     * @return t statistic
403     * @throws NullArgumentException if the sample statistics are <code>null</code>
404     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
405     */
406    public double homoscedasticT(final StatisticalSummary sampleStats1,
407                                 final StatisticalSummary sampleStats2)
408        throws NullArgumentException, NumberIsTooSmallException {
409
410        checkSampleData(sampleStats1);
411        checkSampleData(sampleStats2);
412        return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
413                              sampleStats1.getVariance(), sampleStats2.getVariance(),
414                              sampleStats1.getN(), sampleStats2.getN());
415
416    }
417
418    /**
419     * Returns the <i>observed significance level</i>, or
420     * <i>p-value</i>, associated with a one-sample, two-tailed t-test
421     * comparing the mean of the input array with the constant <code>mu</code>.
422     * <p>
423     * The number returned is the smallest significance level
424     * at which one can reject the null hypothesis that the mean equals
425     * <code>mu</code> in favor of the two-sided alternative that the mean
426     * is different from <code>mu</code>. For a one-sided test, divide the
427     * returned value by 2.</p>
428     * <p>
429     * <strong>Usage Note:</strong><br>
430     * The validity of the test depends on the assumptions of the parametric
431     * t-test procedure, as discussed
432     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
433     * </p><p>
434     * <strong>Preconditions</strong>: <ul>
435     * <li>The observed array length must be at least 2.
436     * </li></ul></p>
437     *
438     * @param mu constant value to compare sample mean against
439     * @param sample array of sample data values
440     * @return p-value
441     * @throws NullArgumentException if the sample array is <code>null</code>
442     * @throws NumberIsTooSmallException if the length of the array is &lt; 2
443     * @throws MaxCountExceededException if an error occurs computing the p-value
444     */
445    public double tTest(final double mu, final double[] sample)
446        throws NullArgumentException, NumberIsTooSmallException,
447        MaxCountExceededException {
448
449        checkSampleData(sample);
450        // No try-catch or advertised exception because args have just been checked
451        return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
452                     sample.length);
453
454    }
455
456    /**
457     * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
458     * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
459     * which <code>sample</code> is drawn equals <code>mu</code>.
460     * <p>
461     * Returns <code>true</code> iff the null hypothesis can be
462     * rejected with confidence <code>1 - alpha</code>.  To
463     * perform a 1-sided test, use <code>alpha * 2</code></p>
464     * <p>
465     * <strong>Examples:</strong><br><ol>
466     * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
467     * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
468     * </li>
469     * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
470     * at the 99% level, first verify that the measured sample mean is less
471     * than <code>mu</code> and then use
472     * <br><code>tTest(mu, sample, 0.02) </code>
473     * </li></ol></p>
474     * <p>
475     * <strong>Usage Note:</strong><br>
476     * The validity of the test depends on the assumptions of the one-sample
477     * parametric t-test procedure, as discussed
478     * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
479     * </p><p>
480     * <strong>Preconditions</strong>: <ul>
481     * <li>The observed array length must be at least 2.
482     * </li></ul></p>
483     *
484     * @param mu constant value to compare sample mean against
485     * @param sample array of sample data values
486     * @param alpha significance level of the test
487     * @return p-value
488     * @throws NullArgumentException if the sample array is <code>null</code>
489     * @throws NumberIsTooSmallException if the length of the array is &lt; 2
490     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
491     * @throws MaxCountExceededException if an error computing the p-value
492     */
493    public boolean tTest(final double mu, final double[] sample, final double alpha)
494        throws NullArgumentException, NumberIsTooSmallException,
495        OutOfRangeException, MaxCountExceededException {
496
497        checkSignificanceLevel(alpha);
498        return tTest(mu, sample) < alpha;
499
500    }
501
502    /**
503     * Returns the <i>observed significance level</i>, or
504     * <i>p-value</i>, associated with a one-sample, two-tailed t-test
505     * comparing the mean of the dataset described by <code>sampleStats</code>
506     * with the constant <code>mu</code>.
507     * <p>
508     * The number returned is the smallest significance level
509     * at which one can reject the null hypothesis that the mean equals
510     * <code>mu</code> in favor of the two-sided alternative that the mean
511     * is different from <code>mu</code>. For a one-sided test, divide the
512     * returned value by 2.</p>
513     * <p>
514     * <strong>Usage Note:</strong><br>
515     * The validity of the test depends on the assumptions of the parametric
516     * t-test procedure, as discussed
517     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
518     * here</a></p>
519     * <p>
520     * <strong>Preconditions</strong>: <ul>
521     * <li>The sample must contain at least 2 observations.
522     * </li></ul></p>
523     *
524     * @param mu constant value to compare sample mean against
525     * @param sampleStats StatisticalSummary describing sample data
526     * @return p-value
527     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
528     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
529     * @throws MaxCountExceededException if an error occurs computing the p-value
530     */
531    public double tTest(final double mu, final StatisticalSummary sampleStats)
532        throws NullArgumentException, NumberIsTooSmallException,
533        MaxCountExceededException {
534
535        checkSampleData(sampleStats);
536        return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
537                     sampleStats.getN());
538
539    }
540
541    /**
542     * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
543     * two-sided t-test</a> evaluating the null hypothesis that the mean of the
544     * population from which the dataset described by <code>stats</code> is
545     * drawn equals <code>mu</code>.
546     * <p>
547     * Returns <code>true</code> iff the null hypothesis can be rejected with
548     * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
549     * <code>alpha * 2.</code></p>
550     * <p>
551     * <strong>Examples:</strong><br><ol>
552     * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
553     * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
554     * </li>
555     * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
556     * at the 99% level, first verify that the measured sample mean is less
557     * than <code>mu</code> and then use
558     * <br><code>tTest(mu, sampleStats, 0.02) </code>
559     * </li></ol></p>
560     * <p>
561     * <strong>Usage Note:</strong><br>
562     * The validity of the test depends on the assumptions of the one-sample
563     * parametric t-test procedure, as discussed
564     * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
565     * </p><p>
566     * <strong>Preconditions</strong>: <ul>
567     * <li>The sample must include at least 2 observations.
568     * </li></ul></p>
569     *
570     * @param mu constant value to compare sample mean against
571     * @param sampleStats StatisticalSummary describing sample data values
572     * @param alpha significance level of the test
573     * @return p-value
574     * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
575     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
576     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
577     * @throws MaxCountExceededException if an error occurs computing the p-value
578     */
579    public boolean tTest(final double mu, final StatisticalSummary sampleStats,
580                         final double alpha)
581    throws NullArgumentException, NumberIsTooSmallException,
582    OutOfRangeException, MaxCountExceededException {
583
584        checkSignificanceLevel(alpha);
585        return tTest(mu, sampleStats) < alpha;
586
587    }
588
589    /**
590     * Returns the <i>observed significance level</i>, or
591     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
592     * comparing the means of the input arrays.
593     * <p>
594     * The number returned is the smallest significance level
595     * at which one can reject the null hypothesis that the two means are
596     * equal in favor of the two-sided alternative that they are different.
597     * For a one-sided test, divide the returned value by 2.</p>
598     * <p>
599     * The test does not assume that the underlying popuation variances are
600     * equal  and it uses approximated degrees of freedom computed from the
601     * sample data to compute the p-value.  The t-statistic used is as defined in
602     * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
603     * to the degrees of freedom is used,
604     * as described
605     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
606     * here.</a>  To perform the test under the assumption of equal subpopulation
607     * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
608     * <p>
609     * <strong>Usage Note:</strong><br>
610     * The validity of the p-value depends on the assumptions of the parametric
611     * t-test procedure, as discussed
612     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
613     * here</a></p>
614     * <p>
615     * <strong>Preconditions</strong>: <ul>
616     * <li>The observed array lengths must both be at least 2.
617     * </li></ul></p>
618     *
619     * @param sample1 array of sample data values
620     * @param sample2 array of sample data values
621     * @return p-value for t-test
622     * @throws NullArgumentException if the arrays are <code>null</code>
623     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
624     * @throws MaxCountExceededException if an error occurs computing the p-value
625     */
626    public double tTest(final double[] sample1, final double[] sample2)
627        throws NullArgumentException, NumberIsTooSmallException,
628        MaxCountExceededException {
629
630        checkSampleData(sample1);
631        checkSampleData(sample2);
632        // No try-catch or advertised exception because args have just been checked
633        return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
634                     StatUtils.variance(sample1), StatUtils.variance(sample2),
635                     sample1.length, sample2.length);
636
637    }
638
639    /**
640     * Returns the <i>observed significance level</i>, or
641     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
642     * comparing the means of the input arrays, under the assumption that
643     * the two samples are drawn from subpopulations with equal variances.
644     * To perform the test without the equal variances assumption, use
645     * {@link #tTest(double[], double[])}.</p>
646     * <p>
647     * The number returned is the smallest significance level
648     * at which one can reject the null hypothesis that the two means are
649     * equal in favor of the two-sided alternative that they are different.
650     * For a one-sided test, divide the returned value by 2.</p>
651     * <p>
652     * A pooled variance estimate is used to compute the t-statistic.  See
653     * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
654     * minus 2 is used as the degrees of freedom.</p>
655     * <p>
656     * <strong>Usage Note:</strong><br>
657     * The validity of the p-value depends on the assumptions of the parametric
658     * t-test procedure, as discussed
659     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
660     * here</a></p>
661     * <p>
662     * <strong>Preconditions</strong>: <ul>
663     * <li>The observed array lengths must both be at least 2.
664     * </li></ul></p>
665     *
666     * @param sample1 array of sample data values
667     * @param sample2 array of sample data values
668     * @return p-value for t-test
669     * @throws NullArgumentException if the arrays are <code>null</code>
670     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
671     * @throws MaxCountExceededException if an error occurs computing the p-value
672     */
673    public double homoscedasticTTest(final double[] sample1, final double[] sample2)
674        throws NullArgumentException, NumberIsTooSmallException,
675        MaxCountExceededException {
676
677        checkSampleData(sample1);
678        checkSampleData(sample2);
679        // No try-catch or advertised exception because args have just been checked
680        return homoscedasticTTest(StatUtils.mean(sample1),
681                                  StatUtils.mean(sample2),
682                                  StatUtils.variance(sample1),
683                                  StatUtils.variance(sample2),
684                                  sample1.length, sample2.length);
685
686    }
687
688    /**
689     * Performs a
690     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
691     * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
692     * and <code>sample2</code> are drawn from populations with the same mean,
693     * with significance level <code>alpha</code>.  This test does not assume
694     * that the subpopulation variances are equal.  To perform the test assuming
695     * equal variances, use
696     * {@link #homoscedasticTTest(double[], double[], double)}.
697     * <p>
698     * Returns <code>true</code> iff the null hypothesis that the means are
699     * equal can be rejected with confidence <code>1 - alpha</code>.  To
700     * perform a 1-sided test, use <code>alpha * 2</code></p>
701     * <p>
702     * See {@link #t(double[], double[])} for the formula used to compute the
703     * t-statistic.  Degrees of freedom are approximated using the
704     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
705     * Welch-Satterthwaite approximation.</a></p>
706     * <p>
707     * <strong>Examples:</strong><br><ol>
708     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
709     * the 95% level,  use
710     * <br><code>tTest(sample1, sample2, 0.05). </code>
711     * </li>
712     * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
713     * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
714     * is less than the mean of <code>sample 2</code> and then use
715     * <br><code>tTest(sample1, sample2, 0.02) </code>
716     * </li></ol></p>
717     * <p>
718     * <strong>Usage Note:</strong><br>
719     * The validity of the test depends on the assumptions of the parametric
720     * t-test procedure, as discussed
721     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
722     * here</a></p>
723     * <p>
724     * <strong>Preconditions</strong>: <ul>
725     * <li>The observed array lengths must both be at least 2.
726     * </li>
727     * <li> <code> 0 < alpha < 0.5 </code>
728     * </li></ul></p>
729     *
730     * @param sample1 array of sample data values
731     * @param sample2 array of sample data values
732     * @param alpha significance level of the test
733     * @return true if the null hypothesis can be rejected with
734     * confidence 1 - alpha
735     * @throws NullArgumentException if the arrays are <code>null</code>
736     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
737     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
738     * @throws MaxCountExceededException if an error occurs computing the p-value
739     */
740    public boolean tTest(final double[] sample1, final double[] sample2,
741                         final double alpha)
742        throws NullArgumentException, NumberIsTooSmallException,
743        OutOfRangeException, MaxCountExceededException {
744
745        checkSignificanceLevel(alpha);
746        return tTest(sample1, sample2) < alpha;
747
748    }
749
750    /**
751     * Performs a
752     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
753     * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
754     * and <code>sample2</code> are drawn from populations with the same mean,
755     * with significance level <code>alpha</code>,  assuming that the
756     * subpopulation variances are equal.  Use
757     * {@link #tTest(double[], double[], double)} to perform the test without
758     * the assumption of equal variances.
759     * <p>
760     * Returns <code>true</code> iff the null hypothesis that the means are
761     * equal can be rejected with confidence <code>1 - alpha</code>.  To
762     * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
763     * without the assumption of equal subpopulation variances, use
764     * {@link #tTest(double[], double[], double)}.</p>
765     * <p>
766     * A pooled variance estimate is used to compute the t-statistic. See
767     * {@link #t(double[], double[])} for the formula. The sum of the sample
768     * sizes minus 2 is used as the degrees of freedom.</p>
769     * <p>
770     * <strong>Examples:</strong><br><ol>
771     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
772     * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
773     * </li>
774     * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
775     * at the 99% level, first verify that the measured mean of
776     * <code>sample 1</code> is less than the mean of <code>sample 2</code>
777     * and then use
778     * <br><code>tTest(sample1, sample2, 0.02) </code>
779     * </li></ol></p>
780     * <p>
781     * <strong>Usage Note:</strong><br>
782     * The validity of the test depends on the assumptions of the parametric
783     * t-test procedure, as discussed
784     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
785     * here</a></p>
786     * <p>
787     * <strong>Preconditions</strong>: <ul>
788     * <li>The observed array lengths must both be at least 2.
789     * </li>
790     * <li> <code> 0 < alpha < 0.5 </code>
791     * </li></ul></p>
792     *
793     * @param sample1 array of sample data values
794     * @param sample2 array of sample data values
795     * @param alpha significance level of the test
796     * @return true if the null hypothesis can be rejected with
797     * confidence 1 - alpha
798     * @throws NullArgumentException if the arrays are <code>null</code>
799     * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
800     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
801     * @throws MaxCountExceededException if an error occurs computing the p-value
802     */
803    public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
804                                      final double alpha)
805        throws NullArgumentException, NumberIsTooSmallException,
806        OutOfRangeException, MaxCountExceededException {
807
808        checkSignificanceLevel(alpha);
809        return homoscedasticTTest(sample1, sample2) < alpha;
810
811    }
812
813    /**
814     * Returns the <i>observed significance level</i>, or
815     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
816     * comparing the means of the datasets described by two StatisticalSummary
817     * instances.
818     * <p>
819     * The number returned is the smallest significance level
820     * at which one can reject the null hypothesis that the two means are
821     * equal in favor of the two-sided alternative that they are different.
822     * For a one-sided test, divide the returned value by 2.</p>
823     * <p>
824     * The test does not assume that the underlying population variances are
825     * equal  and it uses approximated degrees of freedom computed from the
826     * sample data to compute the p-value.   To perform the test assuming
827     * equal variances, use
828     * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
829     * <p>
830     * <strong>Usage Note:</strong><br>
831     * The validity of the p-value depends on the assumptions of the parametric
832     * t-test procedure, as discussed
833     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
834     * here</a></p>
835     * <p>
836     * <strong>Preconditions</strong>: <ul>
837     * <li>The datasets described by the two Univariates must each contain
838     * at least 2 observations.
839     * </li></ul></p>
840     *
841     * @param sampleStats1  StatisticalSummary describing data from the first sample
842     * @param sampleStats2  StatisticalSummary describing data from the second sample
843     * @return p-value for t-test
844     * @throws NullArgumentException if the sample statistics are <code>null</code>
845     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
846     * @throws MaxCountExceededException if an error occurs computing the p-value
847     */
848    public double tTest(final StatisticalSummary sampleStats1,
849                        final StatisticalSummary sampleStats2)
850        throws NullArgumentException, NumberIsTooSmallException,
851        MaxCountExceededException {
852
853        checkSampleData(sampleStats1);
854        checkSampleData(sampleStats2);
855        return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
856                     sampleStats1.getVariance(), sampleStats2.getVariance(),
857                     sampleStats1.getN(), sampleStats2.getN());
858
859    }
860
861    /**
862     * Returns the <i>observed significance level</i>, or
863     * <i>p-value</i>, associated with a two-sample, two-tailed t-test
864     * comparing the means of the datasets described by two StatisticalSummary
865     * instances, under the hypothesis of equal subpopulation variances. To
866     * perform a test without the equal variances assumption, use
867     * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
868     * <p>
869     * The number returned is the smallest significance level
870     * at which one can reject the null hypothesis that the two means are
871     * equal in favor of the two-sided alternative that they are different.
872     * For a one-sided test, divide the returned value by 2.</p>
873     * <p>
874     * See {@link #homoscedasticT(double[], double[])} for the formula used to
875     * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
876     * the degrees of freedom.</p>
877     * <p>
878     * <strong>Usage Note:</strong><br>
879     * The validity of the p-value depends on the assumptions of the parametric
880     * t-test procedure, as discussed
881     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
882     * </p><p>
883     * <strong>Preconditions</strong>: <ul>
884     * <li>The datasets described by the two Univariates must each contain
885     * at least 2 observations.
886     * </li></ul></p>
887     *
888     * @param sampleStats1  StatisticalSummary describing data from the first sample
889     * @param sampleStats2  StatisticalSummary describing data from the second sample
890     * @return p-value for t-test
891     * @throws NullArgumentException if the sample statistics are <code>null</code>
892     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
893     * @throws MaxCountExceededException if an error occurs computing the p-value
894     */
895    public double homoscedasticTTest(final StatisticalSummary sampleStats1,
896                                     final StatisticalSummary sampleStats2)
897        throws NullArgumentException, NumberIsTooSmallException,
898        MaxCountExceededException {
899
900        checkSampleData(sampleStats1);
901        checkSampleData(sampleStats2);
902        return homoscedasticTTest(sampleStats1.getMean(),
903                                  sampleStats2.getMean(),
904                                  sampleStats1.getVariance(),
905                                  sampleStats2.getVariance(),
906                                  sampleStats1.getN(), sampleStats2.getN());
907
908    }
909
910    /**
911     * Performs a
912     * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
913     * two-sided t-test</a> evaluating the null hypothesis that
914     * <code>sampleStats1</code> and <code>sampleStats2</code> describe
915     * datasets drawn from populations with the same mean, with significance
916     * level <code>alpha</code>.   This test does not assume that the
917     * subpopulation variances are equal.  To perform the test under the equal
918     * variances assumption, use
919     * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
920     * <p>
921     * Returns <code>true</code> iff the null hypothesis that the means are
922     * equal can be rejected with confidence <code>1 - alpha</code>.  To
923     * perform a 1-sided test, use <code>alpha * 2</code></p>
924     * <p>
925     * See {@link #t(double[], double[])} for the formula used to compute the
926     * t-statistic.  Degrees of freedom are approximated using the
927     * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
928     * Welch-Satterthwaite approximation.</a></p>
929     * <p>
930     * <strong>Examples:</strong><br><ol>
931     * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
932     * the 95%, use
933     * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
934     * </li>
935     * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
936     * at the 99% level,  first verify that the measured mean of
937     * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
938     * and then use
939     * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
940     * </li></ol></p>
941     * <p>
942     * <strong>Usage Note:</strong><br>
943     * The validity of the test depends on the assumptions of the parametric
944     * t-test procedure, as discussed
945     * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
946     * here</a></p>
947     * <p>
948     * <strong>Preconditions</strong>: <ul>
949     * <li>The datasets described by the two Univariates must each contain
950     * at least 2 observations.
951     * </li>
952     * <li> <code> 0 < alpha < 0.5 </code>
953     * </li></ul></p>
954     *
955     * @param sampleStats1 StatisticalSummary describing sample data values
956     * @param sampleStats2 StatisticalSummary describing sample data values
957     * @param alpha significance level of the test
958     * @return true if the null hypothesis can be rejected with
959     * confidence 1 - alpha
960     * @throws NullArgumentException if the sample statistics are <code>null</code>
961     * @throws NumberIsTooSmallException if the number of samples is &lt; 2
962     * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
963     * @throws MaxCountExceededException if an error occurs computing the p-value
964     */
965    public boolean tTest(final StatisticalSummary sampleStats1,
966                         final StatisticalSummary sampleStats2,
967                         final double alpha)
968        throws NullArgumentException, NumberIsTooSmallException,
969        OutOfRangeException, MaxCountExceededException {
970
971        checkSignificanceLevel(alpha);
972        return tTest(sampleStats1, sampleStats2) < alpha;
973
974    }
975
976    //----------------------------------------------- Protected methods
977
978    /**
979     * Computes approximate degrees of freedom for 2-sample t-test.
980     *
981     * @param v1 first sample variance
982     * @param v2 second sample variance
983     * @param n1 first sample n
984     * @param n2 second sample n
985     * @return approximate degrees of freedom
986     */
987    protected double df(double v1, double v2, double n1, double n2) {
988        return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
989        ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
990                (n2 * n2 * (n2 - 1d)));
991    }
992
993    /**
994     * Computes t test statistic for 1-sample t-test.
995     *
996     * @param m sample mean
997     * @param mu constant to test against
998     * @param v sample variance
999     * @param n sample n
1000     * @return t test statistic
1001     */
1002    protected double t(final double m, final double mu,
1003                       final double v, final double n) {
1004        return (m - mu) / FastMath.sqrt(v / n);
1005    }
1006
1007    /**
1008     * Computes t test statistic for 2-sample t-test.
1009     * <p>
1010     * Does not assume that subpopulation variances are equal.</p>
1011     *
1012     * @param m1 first sample mean
1013     * @param m2 second sample mean
1014     * @param v1 first sample variance
1015     * @param v2 second sample variance
1016     * @param n1 first sample n
1017     * @param n2 second sample n
1018     * @return t test statistic
1019     */
1020    protected double t(final double m1, final double m2,
1021                       final double v1, final double v2,
1022                       final double n1, final double n2)  {
1023        return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1024    }
1025
1026    /**
1027     * Computes t test statistic for 2-sample t-test under the hypothesis
1028     * of equal subpopulation variances.
1029     *
1030     * @param m1 first sample mean
1031     * @param m2 second sample mean
1032     * @param v1 first sample variance
1033     * @param v2 second sample variance
1034     * @param n1 first sample n
1035     * @param n2 second sample n
1036     * @return t test statistic
1037     */
1038    protected double homoscedasticT(final double m1, final double m2,
1039                                    final double v1, final double v2,
1040                                    final double n1, final double n2)  {
1041        final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1042        return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1043    }
1044
1045    /**
1046     * Computes p-value for 2-sided, 1-sample t-test.
1047     *
1048     * @param m sample mean
1049     * @param mu constant to test against
1050     * @param v sample variance
1051     * @param n sample n
1052     * @return p-value
1053     * @throws MaxCountExceededException if an error occurs computing the p-value
1054     * @throws MathIllegalArgumentException if n is not greater than 1
1055     */
1056    protected double tTest(final double m, final double mu,
1057                           final double v, final double n)
1058        throws MaxCountExceededException, MathIllegalArgumentException {
1059
1060        double t = FastMath.abs(t(m, mu, v, n));
1061        TDistribution distribution = new TDistribution(n - 1);
1062        return 2.0 * distribution.cumulativeProbability(-t);
1063
1064    }
1065
1066    /**
1067     * Computes p-value for 2-sided, 2-sample t-test.
1068     * <p>
1069     * Does not assume subpopulation variances are equal. Degrees of freedom
1070     * are estimated from the data.</p>
1071     *
1072     * @param m1 first sample mean
1073     * @param m2 second sample mean
1074     * @param v1 first sample variance
1075     * @param v2 second sample variance
1076     * @param n1 first sample n
1077     * @param n2 second sample n
1078     * @return p-value
1079     * @throws MaxCountExceededException if an error occurs computing the p-value
1080     * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1081     * strictly positive
1082     */
1083    protected double tTest(final double m1, final double m2,
1084                           final double v1, final double v2,
1085                           final double n1, final double n2)
1086        throws MaxCountExceededException, NotStrictlyPositiveException {
1087
1088        final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1089        final double degreesOfFreedom = df(v1, v2, n1, n2);
1090        TDistribution distribution = new TDistribution(degreesOfFreedom);
1091        return 2.0 * distribution.cumulativeProbability(-t);
1092
1093    }
1094
1095    /**
1096     * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1097     * of equal subpopulation variances.
1098     * <p>
1099     * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1100     *
1101     * @param m1 first sample mean
1102     * @param m2 second sample mean
1103     * @param v1 first sample variance
1104     * @param v2 second sample variance
1105     * @param n1 first sample n
1106     * @param n2 second sample n
1107     * @return p-value
1108     * @throws MaxCountExceededException if an error occurs computing the p-value
1109     * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1110     * strictly positive
1111     */
1112    protected double homoscedasticTTest(double m1, double m2,
1113                                        double v1, double v2,
1114                                        double n1, double n2)
1115        throws MaxCountExceededException, NotStrictlyPositiveException {
1116
1117        final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1118        final double degreesOfFreedom = n1 + n2 - 2;
1119        TDistribution distribution = new TDistribution(degreesOfFreedom);
1120        return 2.0 * distribution.cumulativeProbability(-t);
1121
1122    }
1123
1124    /**
1125     * Check significance level.
1126     *
1127     * @param alpha significance level
1128     * @throws OutOfRangeException if the significance level is out of bounds.
1129     */
1130    private void checkSignificanceLevel(final double alpha)
1131        throws OutOfRangeException {
1132
1133        if (alpha <= 0 || alpha > 0.5) {
1134            throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1135                                          alpha, 0.0, 0.5);
1136        }
1137
1138    }
1139
1140    /**
1141     * Check sample data.
1142     *
1143     * @param data Sample data.
1144     * @throws NullArgumentException if {@code data} is {@code null}.
1145     * @throws NumberIsTooSmallException if there is not enough sample data.
1146     */
1147    private void checkSampleData(final double[] data)
1148        throws NullArgumentException, NumberIsTooSmallException {
1149
1150        if (data == null) {
1151            throw new NullArgumentException();
1152        }
1153        if (data.length < 2) {
1154            throw new NumberIsTooSmallException(
1155                    LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1156                    data.length, 2, true);
1157        }
1158
1159    }
1160
1161    /**
1162     * Check sample data.
1163     *
1164     * @param stat Statistical summary.
1165     * @throws NullArgumentException if {@code data} is {@code null}.
1166     * @throws NumberIsTooSmallException if there is not enough sample data.
1167     */
1168    private void checkSampleData(final StatisticalSummary stat)
1169        throws NullArgumentException, NumberIsTooSmallException {
1170
1171        if (stat == null) {
1172            throw new NullArgumentException();
1173        }
1174        if (stat.getN() < 2) {
1175            throw new NumberIsTooSmallException(
1176                    LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1177                    stat.getN(), 2, true);
1178        }
1179
1180    }
1181
1182}