View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.inference;
18  
19  import org.apache.commons.statistics.distribution.TDistribution;
20  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
21  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
22  import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
23  import org.apache.commons.math4.legacy.exception.NoDataException;
24  import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
25  import org.apache.commons.math4.legacy.exception.NullArgumentException;
26  import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
27  import org.apache.commons.math4.legacy.exception.OutOfRangeException;
28  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
29  import org.apache.commons.math4.legacy.stat.StatUtils;
30  import org.apache.commons.math4.legacy.stat.descriptive.StatisticalSummary;
31  import org.apache.commons.math4.core.jdkmath.JdkMath;
32  
33  /**
34   * An implementation for Student's t-tests.
35   * <p>
36   * Tests can be:<ul>
37   * <li>One-sample or two-sample</li>
38   * <li>One-sided or two-sided</li>
39   * <li>Paired or unpaired (for two-sample tests)</li>
40   * <li>Homoscedastic (equal variance assumption) or heteroscedastic
41   * (for two sample tests)</li>
42   * <li>Fixed significance level (boolean-valued) or returning p-values.
43   * </li></ul>
44   * <p>
45   * Test statistics are available for all tests.  Methods including "Test" in
46   * in their names perform tests, all other methods return t-statistics.  Among
47   * the "Test" methods, <code>double-</code>valued methods return p-values;
48   * <code>boolean-</code>valued methods perform fixed significance level tests.
49   * Significance levels are always specified as numbers between 0 and 0.5
50   * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).</p>
51   * <p>
52   * Input to tests can be either <code>double[]</code> arrays or
53   * {@link StatisticalSummary} instances.</p><p>
54   * Uses commons-math {@link org.apache.commons.statistics.distribution.TDistribution}
55   * implementation to estimate exact p-values.</p>
56   *
57   */
58  public class TTest {
59      /**
60       * Computes a paired, 2-sample t-statistic based on the data in the input
61       * arrays.  The t-statistic returned is equivalent to what would be returned by
62       * computing the one-sample t-statistic {@link #t(double, double[])}, with
63       * <code>mu = 0</code> and the sample array consisting of the (signed)
64       * differences between corresponding entries in <code>sample1</code> and
65       * <code>sample2.</code>
66       * <p>
67       * <strong>Preconditions</strong>: <ul>
68       * <li>The input arrays must have the same length and their common length
69       * must be at least 2.
70       * </li></ul>
71       *
72       * @param sample1 array of sample data values
73       * @param sample2 array of sample data values
74       * @return t statistic
75       * @throws NullArgumentException if the arrays are <code>null</code>
76       * @throws NoDataException if the arrays are empty
77       * @throws DimensionMismatchException if the length of the arrays is not equal
78       * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
79       */
80      public double pairedT(final double[] sample1, final double[] sample2)
81          throws NullArgumentException, NoDataException,
82          DimensionMismatchException, NumberIsTooSmallException {
83  
84          checkSampleData(sample1);
85          checkSampleData(sample2);
86          double meanDifference = StatUtils.meanDifference(sample1, sample2);
87          return t(meanDifference, 0,
88                   StatUtils.varianceDifference(sample1, sample2, meanDifference),
89                   sample1.length);
90      }
91  
92      /**
93       * Returns the <i>observed significance level</i>, or
94       * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
95       * based on the data in the input arrays.
96       * <p>
97       * The number returned is the smallest significance level
98       * at which one can reject the null hypothesis that the mean of the paired
99       * differences is 0 in favor of the two-sided alternative that the mean paired
100      * difference is not equal to 0. For a one-sided test, divide the returned
101      * value by 2.</p>
102      * <p>
103      * This test is equivalent to a one-sample t-test computed using
104      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
105      * array consisting of the signed differences between corresponding elements of
106      * <code>sample1</code> and <code>sample2.</code></p>
107      * <p>
108      * <strong>Usage Note:</strong><br>
109      * The validity of the p-value depends on the assumptions of the parametric
110      * t-test procedure, as discussed
111      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
112      * here</a></p>
113      * <p>
114      * <strong>Preconditions</strong>: <ul>
115      * <li>The input array lengths must be the same and their common length must
116      * be at least 2.
117      * </li></ul>
118      *
119      * @param sample1 array of sample data values
120      * @param sample2 array of sample data values
121      * @return p-value for t-test
122      * @throws NullArgumentException if the arrays are <code>null</code>
123      * @throws NoDataException if the arrays are empty
124      * @throws DimensionMismatchException if the length of the arrays is not equal
125      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
126      * @throws MaxCountExceededException if an error occurs computing the p-value
127      */
128     public double pairedTTest(final double[] sample1, final double[] sample2)
129         throws NullArgumentException, NoDataException, DimensionMismatchException,
130         NumberIsTooSmallException, MaxCountExceededException {
131 
132         double meanDifference = StatUtils.meanDifference(sample1, sample2);
133         return tTest(meanDifference, 0,
134                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
135                 sample1.length);
136     }
137 
138     /**
139      * Performs a paired t-test evaluating the null hypothesis that the
140      * mean of the paired differences between <code>sample1</code> and
141      * <code>sample2</code> is 0 in favor of the two-sided alternative that the
142      * mean paired difference is not equal to 0, with significance level
143      * <code>alpha</code>.
144      * <p>
145      * Returns <code>true</code> iff the null hypothesis can be rejected with
146      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
147      * <code>alpha * 2</code></p>
148      * <p>
149      * <strong>Usage Note:</strong><br>
150      * The validity of the test depends on the assumptions of the parametric
151      * t-test procedure, as discussed
152      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
153      * here</a></p>
154      * <p>
155      * <strong>Preconditions</strong>: <ul>
156      * <li>The input array lengths must be the same and their common length
157      * must be at least 2.
158      * </li>
159      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
160      * </li></ul>
161      *
162      * @param sample1 array of sample data values
163      * @param sample2 array of sample data values
164      * @param alpha significance level of the test
165      * @return true if the null hypothesis can be rejected with
166      * confidence 1 - alpha
167      * @throws NullArgumentException if the arrays are <code>null</code>
168      * @throws NoDataException if the arrays are empty
169      * @throws DimensionMismatchException if the length of the arrays is not equal
170      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
171      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
172      * @throws MaxCountExceededException if an error occurs computing the p-value
173      */
174     public boolean pairedTTest(final double[] sample1, final double[] sample2,
175                                final double alpha)
176         throws NullArgumentException, NoDataException, DimensionMismatchException,
177         NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
178 
179         checkSignificanceLevel(alpha);
180         return pairedTTest(sample1, sample2) < alpha;
181     }
182 
183     /**
184      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
185      * t statistic </a> given observed values and a comparison constant.
186      * <p>
187      * This statistic can be used to perform a one sample t-test for the mean.
188      * </p><p>
189      * <strong>Preconditions</strong>: <ul>
190      * <li>The observed array length must be at least 2.
191      * </li></ul>
192      *
193      * @param mu comparison constant
194      * @param observed array of values
195      * @return t statistic
196      * @throws NullArgumentException if <code>observed</code> is <code>null</code>
197      * @throws NumberIsTooSmallException if the length of <code>observed</code> is &lt; 2
198      */
199     public double t(final double mu, final double[] observed)
200         throws NullArgumentException, NumberIsTooSmallException {
201 
202         checkSampleData(observed);
203         // No try-catch or advertised exception because args have just been checked
204         return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
205                 observed.length);
206     }
207 
208     /**
209      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
210      * t statistic </a> to use in comparing the mean of the dataset described by
211      * <code>sampleStats</code> to <code>mu</code>.
212      * <p>
213      * This statistic can be used to perform a one sample t-test for the mean.
214      * </p><p>
215      * <strong>Preconditions</strong>: <ul>
216      * <li><code>observed.getN() &ge; 2</code>.
217      * </li></ul>
218      *
219      * @param mu comparison constant
220      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
221      * @return t statistic
222      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
223      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
224      */
225     public double t(final double mu, final StatisticalSummary sampleStats)
226         throws NullArgumentException, NumberIsTooSmallException {
227 
228         checkSampleData(sampleStats);
229         return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
230                  sampleStats.getN());
231     }
232 
233     /**
234      * Computes a 2-sample t statistic,  under the hypothesis of equal
235      * subpopulation variances.  To compute a t-statistic without the
236      * equal variances hypothesis, use {@link #t(double[], double[])}.
237      * <p>
238      * This statistic can be used to perform a (homoscedastic) two-sample
239      * t-test to compare sample means.</p>
240      * <p>
241      * The t-statistic is</p>
242      * <p>
243      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
244      * </p><p>
245      * where <strong><code>n1</code></strong> is the size of first sample;
246      * <strong><code> n2</code></strong> is the size of second sample;
247      * <strong><code> m1</code></strong> is the mean of first sample;
248      * <strong><code> m2</code></strong> is the mean of second sample
249      * and <strong><code>var</code></strong> is the pooled variance estimate:
250      * </p><p>
251      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
252      * </p><p>
253      * with <strong><code>var1</code></strong> the variance of the first sample and
254      * <strong><code>var2</code></strong> the variance of the second sample.
255      * </p><p>
256      * <strong>Preconditions</strong>: <ul>
257      * <li>The observed array lengths must both be at least 2.
258      * </li></ul>
259      *
260      * @param sample1 array of sample data values
261      * @param sample2 array of sample data values
262      * @return t statistic
263      * @throws NullArgumentException if the arrays are <code>null</code>
264      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
265      */
266     public double homoscedasticT(final double[] sample1, final double[] sample2)
267         throws NullArgumentException, NumberIsTooSmallException {
268 
269         checkSampleData(sample1);
270         checkSampleData(sample2);
271         // No try-catch or advertised exception because args have just been checked
272         return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
273                               StatUtils.variance(sample1), StatUtils.variance(sample2),
274                               sample1.length, sample2.length);
275     }
276 
277     /**
278      * Computes a 2-sample t statistic, without the hypothesis of equal
279      * subpopulation variances.  To compute a t-statistic assuming equal
280      * variances, use {@link #homoscedasticT(double[], double[])}.
281      * <p>
282      * This statistic can be used to perform a two-sample t-test to compare
283      * sample means.</p>
284      * <p>
285      * The t-statistic is</p>
286      * <p>
287      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
288      * </p><p>
289      *  where <strong><code>n1</code></strong> is the size of the first sample
290      * <strong><code> n2</code></strong> is the size of the second sample;
291      * <strong><code> m1</code></strong> is the mean of the first sample;
292      * <strong><code> m2</code></strong> is the mean of the second sample;
293      * <strong><code> var1</code></strong> is the variance of the first sample;
294      * <strong><code> var2</code></strong> is the variance of the second sample;
295      * </p><p>
296      * <strong>Preconditions</strong>: <ul>
297      * <li>The observed array lengths must both be at least 2.
298      * </li></ul>
299      *
300      * @param sample1 array of sample data values
301      * @param sample2 array of sample data values
302      * @return t statistic
303      * @throws NullArgumentException if the arrays are <code>null</code>
304      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
305      */
306     public double t(final double[] sample1, final double[] sample2)
307         throws NullArgumentException, NumberIsTooSmallException {
308 
309         checkSampleData(sample1);
310         checkSampleData(sample2);
311         // No try-catch or advertised exception because args have just been checked
312         return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
313                  StatUtils.variance(sample1), StatUtils.variance(sample2),
314                  sample1.length, sample2.length);
315     }
316 
317     /**
318      * Computes a 2-sample t statistic, comparing the means of the datasets
319      * described by two {@link StatisticalSummary} instances, without the
320      * assumption of equal subpopulation variances.  Use
321      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
322      * compute a t-statistic under the equal variances assumption.
323      * <p>
324      * This statistic can be used to perform a two-sample t-test to compare
325      * sample means.</p>
326      * <p>
327       * The returned  t-statistic is</p>
328      * <p>
329      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
330      * </p><p>
331      * where <strong><code>n1</code></strong> is the size of the first sample;
332      * <strong><code> n2</code></strong> is the size of the second sample;
333      * <strong><code> m1</code></strong> is the mean of the first sample;
334      * <strong><code> m2</code></strong> is the mean of the second sample
335      * <strong><code> var1</code></strong> is the variance of the first sample;
336      * <strong><code> var2</code></strong> is the variance of the second sample
337      * </p><p>
338      * <strong>Preconditions</strong>: <ul>
339      * <li>The datasets described by the two Univariates must each contain
340      * at least 2 observations.
341      * </li></ul>
342      *
343      * @param sampleStats1 StatisticalSummary describing data from the first sample
344      * @param sampleStats2 StatisticalSummary describing data from the second sample
345      * @return t statistic
346      * @throws NullArgumentException if the sample statistics are <code>null</code>
347      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
348      */
349     public double t(final StatisticalSummary sampleStats1,
350                     final StatisticalSummary sampleStats2)
351         throws NullArgumentException, NumberIsTooSmallException {
352 
353         checkSampleData(sampleStats1);
354         checkSampleData(sampleStats2);
355         return t(sampleStats1.getMean(), sampleStats2.getMean(),
356                  sampleStats1.getVariance(), sampleStats2.getVariance(),
357                  sampleStats1.getN(), sampleStats2.getN());
358     }
359 
360     /**
361      * Computes a 2-sample t statistic, comparing the means of the datasets
362      * described by two {@link StatisticalSummary} instances, under the
363      * assumption of equal subpopulation variances.  To compute a t-statistic
364      * without the equal variances assumption, use
365      * {@link #t(StatisticalSummary, StatisticalSummary)}.
366      * <p>
367      * This statistic can be used to perform a (homoscedastic) two-sample
368      * t-test to compare sample means.</p>
369      * <p>
370      * The t-statistic returned is</p>
371      * <p>
372      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
373      * </p><p>
374      * where <strong><code>n1</code></strong> is the size of first sample;
375      * <strong><code> n2</code></strong> is the size of second sample;
376      * <strong><code> m1</code></strong> is the mean of first sample;
377      * <strong><code> m2</code></strong> is the mean of second sample
378      * and <strong><code>var</code></strong> is the pooled variance estimate:
379      * </p><p>
380      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
381      * </p><p>
382      * with <strong><code>var1</code></strong> the variance of the first sample and
383      * <strong><code>var2</code></strong> the variance of the second sample.
384      * </p><p>
385      * <strong>Preconditions</strong>: <ul>
386      * <li>The datasets described by the two Univariates must each contain
387      * at least 2 observations.
388      * </li></ul>
389      *
390      * @param sampleStats1 StatisticalSummary describing data from the first sample
391      * @param sampleStats2 StatisticalSummary describing data from the second sample
392      * @return t statistic
393      * @throws NullArgumentException if the sample statistics are <code>null</code>
394      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
395      */
396     public double homoscedasticT(final StatisticalSummary sampleStats1,
397                                  final StatisticalSummary sampleStats2)
398         throws NullArgumentException, NumberIsTooSmallException {
399 
400         checkSampleData(sampleStats1);
401         checkSampleData(sampleStats2);
402         return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
403                               sampleStats1.getVariance(), sampleStats2.getVariance(),
404                               sampleStats1.getN(), sampleStats2.getN());
405     }
406 
407     /**
408      * Returns the <i>observed significance level</i>, or
409      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
410      * comparing the mean of the input array with the constant <code>mu</code>.
411      * <p>
412      * The number returned is the smallest significance level
413      * at which one can reject the null hypothesis that the mean equals
414      * <code>mu</code> in favor of the two-sided alternative that the mean
415      * is different from <code>mu</code>. For a one-sided test, divide the
416      * returned value by 2.</p>
417      * <p>
418      * <strong>Usage Note:</strong><br>
419      * The validity of the test depends on the assumptions of the parametric
420      * t-test procedure, as discussed
421      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
422      * </p><p>
423      * <strong>Preconditions</strong>: <ul>
424      * <li>The observed array length must be at least 2.
425      * </li></ul>
426      *
427      * @param mu constant value to compare sample mean against
428      * @param sample array of sample data values
429      * @return p-value
430      * @throws NullArgumentException if the sample array is <code>null</code>
431      * @throws NumberIsTooSmallException if the length of the array is &lt; 2
432      * @throws MaxCountExceededException if an error occurs computing the p-value
433      */
434     public double tTest(final double mu, final double[] sample)
435         throws NullArgumentException, NumberIsTooSmallException,
436         MaxCountExceededException {
437 
438         checkSampleData(sample);
439         // No try-catch or advertised exception because args have just been checked
440         return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
441                      sample.length);
442     }
443 
444     /**
445      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
446      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
447      * which <code>sample</code> is drawn equals <code>mu</code>.
448      * <p>
449      * Returns <code>true</code> iff the null hypothesis can be
450      * rejected with confidence <code>1 - alpha</code>.  To
451      * perform a 1-sided test, use <code>alpha * 2</code></p>
452      * <p>
453      * <strong>Examples:</strong><br><ol>
454      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
455      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
456      * </li>
457      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
458      * at the 99% level, first verify that the measured sample mean is less
459      * than <code>mu</code> and then use
460      * <br><code>tTest(mu, sample, 0.02) </code>
461      * </li></ol>
462      * <p>
463      * <strong>Usage Note:</strong><br>
464      * The validity of the test depends on the assumptions of the one-sample
465      * parametric t-test procedure, as discussed
466      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
467      * </p><p>
468      * <strong>Preconditions</strong>: <ul>
469      * <li>The observed array length must be at least 2.
470      * </li></ul>
471      *
472      * @param mu constant value to compare sample mean against
473      * @param sample array of sample data values
474      * @param alpha significance level of the test
475      * @return p-value
476      * @throws NullArgumentException if the sample array is <code>null</code>
477      * @throws NumberIsTooSmallException if the length of the array is &lt; 2
478      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
479      * @throws MaxCountExceededException if an error computing the p-value
480      */
481     public boolean tTest(final double mu, final double[] sample, final double alpha)
482         throws NullArgumentException, NumberIsTooSmallException,
483         OutOfRangeException, MaxCountExceededException {
484 
485         checkSignificanceLevel(alpha);
486         return tTest(mu, sample) < alpha;
487     }
488 
489     /**
490      * Returns the <i>observed significance level</i>, or
491      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
492      * comparing the mean of the dataset described by <code>sampleStats</code>
493      * with the constant <code>mu</code>.
494      * <p>
495      * The number returned is the smallest significance level
496      * at which one can reject the null hypothesis that the mean equals
497      * <code>mu</code> in favor of the two-sided alternative that the mean
498      * is different from <code>mu</code>. For a one-sided test, divide the
499      * returned value by 2.</p>
500      * <p>
501      * <strong>Usage Note:</strong><br>
502      * The validity of the test depends on the assumptions of the parametric
503      * t-test procedure, as discussed
504      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
505      * here</a></p>
506      * <p>
507      * <strong>Preconditions</strong>: <ul>
508      * <li>The sample must contain at least 2 observations.
509      * </li></ul>
510      *
511      * @param mu constant value to compare sample mean against
512      * @param sampleStats StatisticalSummary describing sample data
513      * @return p-value
514      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
515      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
516      * @throws MaxCountExceededException if an error occurs computing the p-value
517      */
518     public double tTest(final double mu, final StatisticalSummary sampleStats)
519         throws NullArgumentException, NumberIsTooSmallException,
520         MaxCountExceededException {
521 
522         checkSampleData(sampleStats);
523         return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
524                      sampleStats.getN());
525     }
526 
527     /**
528      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
529      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
530      * population from which the dataset described by <code>stats</code> is
531      * drawn equals <code>mu</code>.
532      * <p>
533      * Returns <code>true</code> iff the null hypothesis can be rejected with
534      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
535      * <code>alpha * 2.</code></p>
536      * <p>
537      * <strong>Examples:</strong><br><ol>
538      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
539      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
540      * </li>
541      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
542      * at the 99% level, first verify that the measured sample mean is less
543      * than <code>mu</code> and then use
544      * <br><code>tTest(mu, sampleStats, 0.02) </code>
545      * </li></ol>
546      * <p>
547      * <strong>Usage Note:</strong><br>
548      * The validity of the test depends on the assumptions of the one-sample
549      * parametric t-test procedure, as discussed
550      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
551      * </p><p>
552      * <strong>Preconditions</strong>: <ul>
553      * <li>The sample must include at least 2 observations.
554      * </li></ul>
555      *
556      * @param mu constant value to compare sample mean against
557      * @param sampleStats StatisticalSummary describing sample data values
558      * @param alpha significance level of the test
559      * @return p-value
560      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
561      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
562      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
563      * @throws MaxCountExceededException if an error occurs computing the p-value
564      */
565     public boolean tTest(final double mu, final StatisticalSummary sampleStats,
566                          final double alpha)
567     throws NullArgumentException, NumberIsTooSmallException,
568     OutOfRangeException, MaxCountExceededException {
569 
570         checkSignificanceLevel(alpha);
571         return tTest(mu, sampleStats) < alpha;
572     }
573 
574     /**
575      * Returns the <i>observed significance level</i>, or
576      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
577      * comparing the means of the input arrays.
578      * <p>
579      * The number returned is the smallest significance level
580      * at which one can reject the null hypothesis that the two means are
581      * equal in favor of the two-sided alternative that they are different.
582      * For a one-sided test, divide the returned value by 2.</p>
583      * <p>
584      * The test does not assume that the underlying popuation variances are
585      * equal  and it uses approximated degrees of freedom computed from the
586      * sample data to compute the p-value.  The t-statistic used is as defined in
587      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
588      * to the degrees of freedom is used,
589      * as described
590      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
591      * here.</a>  To perform the test under the assumption of equal subpopulation
592      * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
593      * <p>
594      * <strong>Usage Note:</strong><br>
595      * The validity of the p-value depends on the assumptions of the parametric
596      * t-test procedure, as discussed
597      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
598      * here</a></p>
599      * <p>
600      * <strong>Preconditions</strong>: <ul>
601      * <li>The observed array lengths must both be at least 2.
602      * </li></ul>
603      *
604      * @param sample1 array of sample data values
605      * @param sample2 array of sample data values
606      * @return p-value for t-test
607      * @throws NullArgumentException if the arrays are <code>null</code>
608      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
609      * @throws MaxCountExceededException if an error occurs computing the p-value
610      */
611     public double tTest(final double[] sample1, final double[] sample2)
612         throws NullArgumentException, NumberIsTooSmallException,
613         MaxCountExceededException {
614 
615         checkSampleData(sample1);
616         checkSampleData(sample2);
617         // No try-catch or advertised exception because args have just been checked
618         return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
619                      StatUtils.variance(sample1), StatUtils.variance(sample2),
620                      sample1.length, sample2.length);
621     }
622 
623     /**
624      * Returns the <i>observed significance level</i>, or
625      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
626      * comparing the means of the input arrays, under the assumption that
627      * the two samples are drawn from subpopulations with equal variances.
628      * To perform the test without the equal variances assumption, use
629      * {@link #tTest(double[], double[])}.
630      * <p>
631      * The number returned is the smallest significance level
632      * at which one can reject the null hypothesis that the two means are
633      * equal in favor of the two-sided alternative that they are different.
634      * For a one-sided test, divide the returned value by 2.</p>
635      * <p>
636      * A pooled variance estimate is used to compute the t-statistic.  See
637      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
638      * minus 2 is used as the degrees of freedom.</p>
639      * <p>
640      * <strong>Usage Note:</strong><br>
641      * The validity of the p-value depends on the assumptions of the parametric
642      * t-test procedure, as discussed
643      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
644      * here</a></p>
645      * <p>
646      * <strong>Preconditions</strong>: <ul>
647      * <li>The observed array lengths must both be at least 2.
648      * </li></ul>
649      *
650      * @param sample1 array of sample data values
651      * @param sample2 array of sample data values
652      * @return p-value for t-test
653      * @throws NullArgumentException if the arrays are <code>null</code>
654      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
655      * @throws MaxCountExceededException if an error occurs computing the p-value
656      */
657     public double homoscedasticTTest(final double[] sample1, final double[] sample2)
658         throws NullArgumentException, NumberIsTooSmallException,
659         MaxCountExceededException {
660 
661         checkSampleData(sample1);
662         checkSampleData(sample2);
663         // No try-catch or advertised exception because args have just been checked
664         return homoscedasticTTest(StatUtils.mean(sample1),
665                                   StatUtils.mean(sample2),
666                                   StatUtils.variance(sample1),
667                                   StatUtils.variance(sample2),
668                                   sample1.length, sample2.length);
669     }
670 
671     /**
672      * Performs a
673      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
674      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
675      * and <code>sample2</code> are drawn from populations with the same mean,
676      * with significance level <code>alpha</code>.  This test does not assume
677      * that the subpopulation variances are equal.  To perform the test assuming
678      * equal variances, use
679      * {@link #homoscedasticTTest(double[], double[], double)}.
680      * <p>
681      * Returns <code>true</code> iff the null hypothesis that the means are
682      * equal can be rejected with confidence <code>1 - alpha</code>.  To
683      * perform a 1-sided test, use <code>alpha * 2</code></p>
684      * <p>
685      * See {@link #t(double[], double[])} for the formula used to compute the
686      * t-statistic.  Degrees of freedom are approximated using the
687      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
688      * Welch-Satterthwaite approximation.</a></p>
689      * <p>
690      * <strong>Examples:</strong><br><ol>
691      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
692      * the 95% level,  use
693      * <br><code>tTest(sample1, sample2, 0.05). </code>
694      * </li>
695      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>,
696      * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
697      * is less than the mean of <code>sample 2</code> and then use
698      * <br><code>tTest(sample1, sample2, 0.02) </code>
699      * </li></ol>
700      * <p>
701      * <strong>Usage Note:</strong><br>
702      * The validity of the test depends on the assumptions of the parametric
703      * t-test procedure, as discussed
704      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
705      * here</a></p>
706      * <p>
707      * <strong>Preconditions</strong>: <ul>
708      * <li>The observed array lengths must both be at least 2.
709      * </li>
710      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
711      * </li></ul>
712      *
713      * @param sample1 array of sample data values
714      * @param sample2 array of sample data values
715      * @param alpha significance level of the test
716      * @return true if the null hypothesis can be rejected with
717      * confidence 1 - alpha
718      * @throws NullArgumentException if the arrays are <code>null</code>
719      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
720      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
721      * @throws MaxCountExceededException if an error occurs computing the p-value
722      */
723     public boolean tTest(final double[] sample1, final double[] sample2,
724                          final double alpha)
725         throws NullArgumentException, NumberIsTooSmallException,
726         OutOfRangeException, MaxCountExceededException {
727 
728         checkSignificanceLevel(alpha);
729         return tTest(sample1, sample2) < alpha;
730     }
731 
732     /**
733      * Performs a
734      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
735      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
736      * and <code>sample2</code> are drawn from populations with the same mean,
737      * with significance level <code>alpha</code>,  assuming that the
738      * subpopulation variances are equal.  Use
739      * {@link #tTest(double[], double[], double)} to perform the test without
740      * the assumption of equal variances.
741      * <p>
742      * Returns <code>true</code> iff the null hypothesis that the means are
743      * equal can be rejected with confidence <code>1 - alpha</code>.  To
744      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
745      * without the assumption of equal subpopulation variances, use
746      * {@link #tTest(double[], double[], double)}.</p>
747      * <p>
748      * A pooled variance estimate is used to compute the t-statistic. See
749      * {@link #t(double[], double[])} for the formula. The sum of the sample
750      * sizes minus 2 is used as the degrees of freedom.</p>
751      * <p>
752      * <strong>Examples:</strong><br><ol>
753      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
754      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
755      * </li>
756      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2, </code>
757      * at the 99% level, first verify that the measured mean of
758      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
759      * and then use
760      * <br><code>tTest(sample1, sample2, 0.02) </code>
761      * </li></ol>
762      * <p>
763      * <strong>Usage Note:</strong><br>
764      * The validity of the test depends on the assumptions of the parametric
765      * t-test procedure, as discussed
766      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
767      * here</a></p>
768      * <p>
769      * <strong>Preconditions</strong>: <ul>
770      * <li>The observed array lengths must both be at least 2.
771      * </li>
772      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
773      * </li></ul>
774      *
775      * @param sample1 array of sample data values
776      * @param sample2 array of sample data values
777      * @param alpha significance level of the test
778      * @return true if the null hypothesis can be rejected with
779      * confidence 1 - alpha
780      * @throws NullArgumentException if the arrays are <code>null</code>
781      * @throws NumberIsTooSmallException if the length of the arrays is &lt; 2
782      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
783      * @throws MaxCountExceededException if an error occurs computing the p-value
784      */
785     public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
786                                       final double alpha)
787         throws NullArgumentException, NumberIsTooSmallException,
788         OutOfRangeException, MaxCountExceededException {
789 
790         checkSignificanceLevel(alpha);
791         return homoscedasticTTest(sample1, sample2) < alpha;
792     }
793 
794     /**
795      * Returns the <i>observed significance level</i>, or
796      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
797      * comparing the means of the datasets described by two StatisticalSummary
798      * instances.
799      * <p>
800      * The number returned is the smallest significance level
801      * at which one can reject the null hypothesis that the two means are
802      * equal in favor of the two-sided alternative that they are different.
803      * For a one-sided test, divide the returned value by 2.</p>
804      * <p>
805      * The test does not assume that the underlying population variances are
806      * equal  and it uses approximated degrees of freedom computed from the
807      * sample data to compute the p-value.   To perform the test assuming
808      * equal variances, use
809      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
810      * <p>
811      * <strong>Usage Note:</strong><br>
812      * The validity of the p-value depends on the assumptions of the parametric
813      * t-test procedure, as discussed
814      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
815      * here</a></p>
816      * <p>
817      * <strong>Preconditions</strong>: <ul>
818      * <li>The datasets described by the two Univariates must each contain
819      * at least 2 observations.
820      * </li></ul>
821      *
822      * @param sampleStats1  StatisticalSummary describing data from the first sample
823      * @param sampleStats2  StatisticalSummary describing data from the second sample
824      * @return p-value for t-test
825      * @throws NullArgumentException if the sample statistics are <code>null</code>
826      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
827      * @throws MaxCountExceededException if an error occurs computing the p-value
828      */
829     public double tTest(final StatisticalSummary sampleStats1,
830                         final StatisticalSummary sampleStats2)
831         throws NullArgumentException, NumberIsTooSmallException,
832         MaxCountExceededException {
833 
834         checkSampleData(sampleStats1);
835         checkSampleData(sampleStats2);
836         return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
837                      sampleStats1.getVariance(), sampleStats2.getVariance(),
838                      sampleStats1.getN(), sampleStats2.getN());
839     }
840 
841     /**
842      * Returns the <i>observed significance level</i>, or
843      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
844      * comparing the means of the datasets described by two StatisticalSummary
845      * instances, under the hypothesis of equal subpopulation variances. To
846      * perform a test without the equal variances assumption, use
847      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
848      * <p>
849      * The number returned is the smallest significance level
850      * at which one can reject the null hypothesis that the two means are
851      * equal in favor of the two-sided alternative that they are different.
852      * For a one-sided test, divide the returned value by 2.</p>
853      * <p>
854      * See {@link #homoscedasticT(double[], double[])} for the formula used to
855      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
856      * the degrees of freedom.</p>
857      * <p>
858      * <strong>Usage Note:</strong><br>
859      * The validity of the p-value depends on the assumptions of the parametric
860      * t-test procedure, as discussed
861      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
862      * </p><p>
863      * <strong>Preconditions</strong>: <ul>
864      * <li>The datasets described by the two Univariates must each contain
865      * at least 2 observations.
866      * </li></ul>
867      *
868      * @param sampleStats1  StatisticalSummary describing data from the first sample
869      * @param sampleStats2  StatisticalSummary describing data from the second sample
870      * @return p-value for t-test
871      * @throws NullArgumentException if the sample statistics are <code>null</code>
872      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
873      * @throws MaxCountExceededException if an error occurs computing the p-value
874      */
875     public double homoscedasticTTest(final StatisticalSummary sampleStats1,
876                                      final StatisticalSummary sampleStats2)
877         throws NullArgumentException, NumberIsTooSmallException,
878         MaxCountExceededException {
879 
880         checkSampleData(sampleStats1);
881         checkSampleData(sampleStats2);
882         return homoscedasticTTest(sampleStats1.getMean(),
883                                   sampleStats2.getMean(),
884                                   sampleStats1.getVariance(),
885                                   sampleStats2.getVariance(),
886                                   sampleStats1.getN(), sampleStats2.getN());
887     }
888 
889     /**
890      * Performs a
891      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
892      * two-sided t-test</a> evaluating the null hypothesis that
893      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
894      * datasets drawn from populations with the same mean, with significance
895      * level <code>alpha</code>.   This test does not assume that the
896      * subpopulation variances are equal.  To perform the test under the equal
897      * variances assumption, use
898      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
899      * <p>
900      * Returns <code>true</code> iff the null hypothesis that the means are
901      * equal can be rejected with confidence <code>1 - alpha</code>.  To
902      * perform a 1-sided test, use <code>alpha * 2</code></p>
903      * <p>
904      * See {@link #t(double[], double[])} for the formula used to compute the
905      * t-statistic.  Degrees of freedom are approximated using the
906      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
907      * Welch-Satterthwaite approximation.</a></p>
908      * <p>
909      * <strong>Examples:</strong><br><ol>
910      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
911      * the 95%, use
912      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
913      * </li>
914      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>
915      * at the 99% level,  first verify that the measured mean of
916      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
917      * and then use
918      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
919      * </li></ol>
920      * <p>
921      * <strong>Usage Note:</strong><br>
922      * The validity of the test depends on the assumptions of the parametric
923      * t-test procedure, as discussed
924      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
925      * here</a></p>
926      * <p>
927      * <strong>Preconditions</strong>: <ul>
928      * <li>The datasets described by the two Univariates must each contain
929      * at least 2 observations.
930      * </li>
931      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
932      * </li></ul>
933      *
934      * @param sampleStats1 StatisticalSummary describing sample data values
935      * @param sampleStats2 StatisticalSummary describing sample data values
936      * @param alpha significance level of the test
937      * @return true if the null hypothesis can be rejected with
938      * confidence 1 - alpha
939      * @throws NullArgumentException if the sample statistics are <code>null</code>
940      * @throws NumberIsTooSmallException if the number of samples is &lt; 2
941      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
942      * @throws MaxCountExceededException if an error occurs computing the p-value
943      */
944     public boolean tTest(final StatisticalSummary sampleStats1,
945                          final StatisticalSummary sampleStats2,
946                          final double alpha)
947         throws NullArgumentException, NumberIsTooSmallException,
948         OutOfRangeException, MaxCountExceededException {
949 
950         checkSignificanceLevel(alpha);
951         return tTest(sampleStats1, sampleStats2) < alpha;
952     }
953 
954     //----------------------------------------------- Protected methods
955 
956     /**
957      * Computes approximate degrees of freedom for 2-sample t-test.
958      *
959      * @param v1 first sample variance
960      * @param v2 second sample variance
961      * @param n1 first sample n
962      * @param n2 second sample n
963      * @return approximate degrees of freedom
964      */
965     protected double df(double v1, double v2, double n1, double n2) {
966         return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
967         ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
968                 (n2 * n2 * (n2 - 1d)));
969     }
970 
971     /**
972      * Computes t test statistic for 1-sample t-test.
973      *
974      * @param m sample mean
975      * @param mu constant to test against
976      * @param v sample variance
977      * @param n sample n
978      * @return t test statistic
979      */
980     protected double t(final double m, final double mu,
981                        final double v, final double n) {
982         return (m - mu) / JdkMath.sqrt(v / n);
983     }
984 
985     /**
986      * Computes t test statistic for 2-sample t-test.
987      * <p>
988      * Does not assume that subpopulation variances are equal.</p>
989      *
990      * @param m1 first sample mean
991      * @param m2 second sample mean
992      * @param v1 first sample variance
993      * @param v2 second sample variance
994      * @param n1 first sample n
995      * @param n2 second sample n
996      * @return t test statistic
997      */
998     protected double t(final double m1, final double m2,
999                        final double v1, final double v2,
1000                        final double n1, final double n2)  {
1001         return (m1 - m2) / JdkMath.sqrt((v1 / n1) + (v2 / n2));
1002     }
1003 
1004     /**
1005      * Computes t test statistic for 2-sample t-test under the hypothesis
1006      * of equal subpopulation variances.
1007      *
1008      * @param m1 first sample mean
1009      * @param m2 second sample mean
1010      * @param v1 first sample variance
1011      * @param v2 second sample variance
1012      * @param n1 first sample n
1013      * @param n2 second sample n
1014      * @return t test statistic
1015      */
1016     protected double homoscedasticT(final double m1, final double m2,
1017                                     final double v1, final double v2,
1018                                     final double n1, final double n2)  {
1019         final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1020         return (m1 - m2) / JdkMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1021     }
1022 
1023     /**
1024      * Computes p-value for 2-sided, 1-sample t-test.
1025      *
1026      * @param m sample mean
1027      * @param mu constant to test against
1028      * @param v sample variance
1029      * @param n sample n
1030      * @return p-value
1031      * @throws MaxCountExceededException if an error occurs computing the p-value
1032      * @throws MathIllegalArgumentException if n is not greater than 1
1033      */
1034     protected double tTest(final double m, final double mu,
1035                            final double v, final double n)
1036         throws MaxCountExceededException, MathIllegalArgumentException {
1037 
1038         final double t = JdkMath.abs(t(m, mu, v, n));
1039         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1040         final TDistribution distribution = TDistribution.of(n - 1);
1041         return 2.0 * distribution.cumulativeProbability(-t);
1042     }
1043 
1044     /**
1045      * Computes p-value for 2-sided, 2-sample t-test.
1046      * <p>
1047      * Does not assume subpopulation variances are equal. Degrees of freedom
1048      * are estimated from the data.</p>
1049      *
1050      * @param m1 first sample mean
1051      * @param m2 second sample mean
1052      * @param v1 first sample variance
1053      * @param v2 second sample variance
1054      * @param n1 first sample n
1055      * @param n2 second sample n
1056      * @return p-value
1057      * @throws MaxCountExceededException if an error occurs computing the p-value
1058      * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1059      * strictly positive
1060      */
1061     protected double tTest(final double m1, final double m2,
1062                            final double v1, final double v2,
1063                            final double n1, final double n2)
1064         throws MaxCountExceededException, NotStrictlyPositiveException {
1065 
1066         final double t = JdkMath.abs(t(m1, m2, v1, v2, n1, n2));
1067         final double degreesOfFreedom = df(v1, v2, n1, n2);
1068         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1069         final TDistribution distribution = TDistribution.of(degreesOfFreedom);
1070         return 2.0 * distribution.cumulativeProbability(-t);
1071     }
1072 
1073     /**
1074      * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1075      * of equal subpopulation variances.
1076      * <p>
1077      * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1078      *
1079      * @param m1 first sample mean
1080      * @param m2 second sample mean
1081      * @param v1 first sample variance
1082      * @param v2 second sample variance
1083      * @param n1 first sample n
1084      * @param n2 second sample n
1085      * @return p-value
1086      * @throws MaxCountExceededException if an error occurs computing the p-value
1087      * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1088      * strictly positive
1089      */
1090     protected double homoscedasticTTest(double m1, double m2,
1091                                         double v1, double v2,
1092                                         double n1, double n2)
1093         throws MaxCountExceededException, NotStrictlyPositiveException {
1094 
1095         final double t = JdkMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1096         final double degreesOfFreedom = n1 + n2 - 2;
1097         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1098         final TDistribution distribution = TDistribution.of(degreesOfFreedom);
1099         return 2.0 * distribution.cumulativeProbability(-t);
1100     }
1101 
1102     /**
1103      * Check significance level.
1104      *
1105      * @param alpha significance level
1106      * @throws OutOfRangeException if the significance level is out of bounds.
1107      */
1108     private void checkSignificanceLevel(final double alpha)
1109         throws OutOfRangeException {
1110 
1111         if (alpha <= 0 || alpha > 0.5) {
1112             throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1113                                           alpha, 0.0, 0.5);
1114         }
1115     }
1116 
1117     /**
1118      * Check sample data.
1119      *
1120      * @param data Sample data.
1121      * @throws NullArgumentException if {@code data} is {@code null}.
1122      * @throws NumberIsTooSmallException if there is not enough sample data.
1123      */
1124     private void checkSampleData(final double[] data)
1125         throws NullArgumentException, NumberIsTooSmallException {
1126 
1127         if (data == null) {
1128             throw new NullArgumentException();
1129         }
1130         if (data.length < 2) {
1131             throw new NumberIsTooSmallException(
1132                     LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1133                     data.length, 2, true);
1134         }
1135     }
1136 
1137     /**
1138      * Check sample data.
1139      *
1140      * @param stat Statistical summary.
1141      * @throws NullArgumentException if {@code data} is {@code null}.
1142      * @throws NumberIsTooSmallException if there is not enough sample data.
1143      */
1144     private void checkSampleData(final StatisticalSummary stat)
1145         throws NullArgumentException, NumberIsTooSmallException {
1146 
1147         if (stat == null) {
1148             throw new NullArgumentException();
1149         }
1150         if (stat.getN() < 2) {
1151             throw new NumberIsTooSmallException(
1152                     LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1153                     stat.getN(), 2, true);
1154         }
1155     }
1156 }