1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.inference;
18
19 import org.apache.commons.statistics.distribution.TDistribution;
20 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
21 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
22 import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
23 import org.apache.commons.math4.legacy.exception.NoDataException;
24 import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
25 import org.apache.commons.math4.legacy.exception.NullArgumentException;
26 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
27 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
28 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
29 import org.apache.commons.math4.legacy.stat.StatUtils;
30 import org.apache.commons.math4.legacy.stat.descriptive.StatisticalSummary;
31 import org.apache.commons.math4.core.jdkmath.JdkMath;
32
33 /**
34 * An implementation for Student's t-tests.
35 * <p>
36 * Tests can be:<ul>
37 * <li>One-sample or two-sample</li>
38 * <li>One-sided or two-sided</li>
39 * <li>Paired or unpaired (for two-sample tests)</li>
40 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
41 * (for two sample tests)</li>
42 * <li>Fixed significance level (boolean-valued) or returning p-values.
43 * </li></ul>
44 * <p>
45 * Test statistics are available for all tests. Methods including "Test" in
46 * in their names perform tests, all other methods return t-statistics. Among
47 * the "Test" methods, <code>double-</code>valued methods return p-values;
48 * <code>boolean-</code>valued methods perform fixed significance level tests.
49 * Significance levels are always specified as numbers between 0 and 0.5
50 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
51 * <p>
52 * Input to tests can be either <code>double[]</code> arrays or
53 * {@link StatisticalSummary} instances.</p><p>
54 * Uses commons-math {@link org.apache.commons.statistics.distribution.TDistribution}
55 * implementation to estimate exact p-values.</p>
56 *
57 */
58 public class TTest {
59 /**
60 * Computes a paired, 2-sample t-statistic based on the data in the input
61 * arrays. The t-statistic returned is equivalent to what would be returned by
62 * computing the one-sample t-statistic {@link #t(double, double[])}, with
63 * <code>mu = 0</code> and the sample array consisting of the (signed)
64 * differences between corresponding entries in <code>sample1</code> and
65 * <code>sample2.</code>
66 * <p>
67 * <strong>Preconditions</strong>: <ul>
68 * <li>The input arrays must have the same length and their common length
69 * must be at least 2.
70 * </li></ul>
71 *
72 * @param sample1 array of sample data values
73 * @param sample2 array of sample data values
74 * @return t statistic
75 * @throws NullArgumentException if the arrays are <code>null</code>
76 * @throws NoDataException if the arrays are empty
77 * @throws DimensionMismatchException if the length of the arrays is not equal
78 * @throws NumberIsTooSmallException if the length of the arrays is < 2
79 */
80 public double pairedT(final double[] sample1, final double[] sample2)
81 throws NullArgumentException, NoDataException,
82 DimensionMismatchException, NumberIsTooSmallException {
83
84 checkSampleData(sample1);
85 checkSampleData(sample2);
86 double meanDifference = StatUtils.meanDifference(sample1, sample2);
87 return t(meanDifference, 0,
88 StatUtils.varianceDifference(sample1, sample2, meanDifference),
89 sample1.length);
90 }
91
92 /**
93 * Returns the <i>observed significance level</i>, or
94 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
95 * based on the data in the input arrays.
96 * <p>
97 * The number returned is the smallest significance level
98 * at which one can reject the null hypothesis that the mean of the paired
99 * differences is 0 in favor of the two-sided alternative that the mean paired
100 * difference is not equal to 0. For a one-sided test, divide the returned
101 * value by 2.</p>
102 * <p>
103 * This test is equivalent to a one-sample t-test computed using
104 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
105 * array consisting of the signed differences between corresponding elements of
106 * <code>sample1</code> and <code>sample2.</code></p>
107 * <p>
108 * <strong>Usage Note:</strong><br>
109 * The validity of the p-value depends on the assumptions of the parametric
110 * t-test procedure, as discussed
111 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
112 * here</a></p>
113 * <p>
114 * <strong>Preconditions</strong>: <ul>
115 * <li>The input array lengths must be the same and their common length must
116 * be at least 2.
117 * </li></ul>
118 *
119 * @param sample1 array of sample data values
120 * @param sample2 array of sample data values
121 * @return p-value for t-test
122 * @throws NullArgumentException if the arrays are <code>null</code>
123 * @throws NoDataException if the arrays are empty
124 * @throws DimensionMismatchException if the length of the arrays is not equal
125 * @throws NumberIsTooSmallException if the length of the arrays is < 2
126 * @throws MaxCountExceededException if an error occurs computing the p-value
127 */
128 public double pairedTTest(final double[] sample1, final double[] sample2)
129 throws NullArgumentException, NoDataException, DimensionMismatchException,
130 NumberIsTooSmallException, MaxCountExceededException {
131
132 double meanDifference = StatUtils.meanDifference(sample1, sample2);
133 return tTest(meanDifference, 0,
134 StatUtils.varianceDifference(sample1, sample2, meanDifference),
135 sample1.length);
136 }
137
138 /**
139 * Performs a paired t-test evaluating the null hypothesis that the
140 * mean of the paired differences between <code>sample1</code> and
141 * <code>sample2</code> is 0 in favor of the two-sided alternative that the
142 * mean paired difference is not equal to 0, with significance level
143 * <code>alpha</code>.
144 * <p>
145 * Returns <code>true</code> iff the null hypothesis can be rejected with
146 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
147 * <code>alpha * 2</code></p>
148 * <p>
149 * <strong>Usage Note:</strong><br>
150 * The validity of the test depends on the assumptions of the parametric
151 * t-test procedure, as discussed
152 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
153 * here</a></p>
154 * <p>
155 * <strong>Preconditions</strong>: <ul>
156 * <li>The input array lengths must be the same and their common length
157 * must be at least 2.
158 * </li>
159 * <li> <code> 0 < alpha < 0.5 </code>
160 * </li></ul>
161 *
162 * @param sample1 array of sample data values
163 * @param sample2 array of sample data values
164 * @param alpha significance level of the test
165 * @return true if the null hypothesis can be rejected with
166 * confidence 1 - alpha
167 * @throws NullArgumentException if the arrays are <code>null</code>
168 * @throws NoDataException if the arrays are empty
169 * @throws DimensionMismatchException if the length of the arrays is not equal
170 * @throws NumberIsTooSmallException if the length of the arrays is < 2
171 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
172 * @throws MaxCountExceededException if an error occurs computing the p-value
173 */
174 public boolean pairedTTest(final double[] sample1, final double[] sample2,
175 final double alpha)
176 throws NullArgumentException, NoDataException, DimensionMismatchException,
177 NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
178
179 checkSignificanceLevel(alpha);
180 return pairedTTest(sample1, sample2) < alpha;
181 }
182
183 /**
184 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
185 * t statistic </a> given observed values and a comparison constant.
186 * <p>
187 * This statistic can be used to perform a one sample t-test for the mean.
188 * </p><p>
189 * <strong>Preconditions</strong>: <ul>
190 * <li>The observed array length must be at least 2.
191 * </li></ul>
192 *
193 * @param mu comparison constant
194 * @param observed array of values
195 * @return t statistic
196 * @throws NullArgumentException if <code>observed</code> is <code>null</code>
197 * @throws NumberIsTooSmallException if the length of <code>observed</code> is < 2
198 */
199 public double t(final double mu, final double[] observed)
200 throws NullArgumentException, NumberIsTooSmallException {
201
202 checkSampleData(observed);
203 // No try-catch or advertised exception because args have just been checked
204 return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
205 observed.length);
206 }
207
208 /**
209 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
210 * t statistic </a> to use in comparing the mean of the dataset described by
211 * <code>sampleStats</code> to <code>mu</code>.
212 * <p>
213 * This statistic can be used to perform a one sample t-test for the mean.
214 * </p><p>
215 * <strong>Preconditions</strong>: <ul>
216 * <li><code>observed.getN() ≥ 2</code>.
217 * </li></ul>
218 *
219 * @param mu comparison constant
220 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
221 * @return t statistic
222 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
223 * @throws NumberIsTooSmallException if the number of samples is < 2
224 */
225 public double t(final double mu, final StatisticalSummary sampleStats)
226 throws NullArgumentException, NumberIsTooSmallException {
227
228 checkSampleData(sampleStats);
229 return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
230 sampleStats.getN());
231 }
232
233 /**
234 * Computes a 2-sample t statistic, under the hypothesis of equal
235 * subpopulation variances. To compute a t-statistic without the
236 * equal variances hypothesis, use {@link #t(double[], double[])}.
237 * <p>
238 * This statistic can be used to perform a (homoscedastic) two-sample
239 * t-test to compare sample means.</p>
240 * <p>
241 * The t-statistic is</p>
242 * <p>
243 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
244 * </p><p>
245 * where <strong><code>n1</code></strong> is the size of first sample;
246 * <strong><code> n2</code></strong> is the size of second sample;
247 * <strong><code> m1</code></strong> is the mean of first sample;
248 * <strong><code> m2</code></strong> is the mean of second sample
249 * and <strong><code>var</code></strong> is the pooled variance estimate:
250 * </p><p>
251 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
252 * </p><p>
253 * with <strong><code>var1</code></strong> the variance of the first sample and
254 * <strong><code>var2</code></strong> the variance of the second sample.
255 * </p><p>
256 * <strong>Preconditions</strong>: <ul>
257 * <li>The observed array lengths must both be at least 2.
258 * </li></ul>
259 *
260 * @param sample1 array of sample data values
261 * @param sample2 array of sample data values
262 * @return t statistic
263 * @throws NullArgumentException if the arrays are <code>null</code>
264 * @throws NumberIsTooSmallException if the length of the arrays is < 2
265 */
266 public double homoscedasticT(final double[] sample1, final double[] sample2)
267 throws NullArgumentException, NumberIsTooSmallException {
268
269 checkSampleData(sample1);
270 checkSampleData(sample2);
271 // No try-catch or advertised exception because args have just been checked
272 return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
273 StatUtils.variance(sample1), StatUtils.variance(sample2),
274 sample1.length, sample2.length);
275 }
276
277 /**
278 * Computes a 2-sample t statistic, without the hypothesis of equal
279 * subpopulation variances. To compute a t-statistic assuming equal
280 * variances, use {@link #homoscedasticT(double[], double[])}.
281 * <p>
282 * This statistic can be used to perform a two-sample t-test to compare
283 * sample means.</p>
284 * <p>
285 * The t-statistic is</p>
286 * <p>
287 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
288 * </p><p>
289 * where <strong><code>n1</code></strong> is the size of the first sample
290 * <strong><code> n2</code></strong> is the size of the second sample;
291 * <strong><code> m1</code></strong> is the mean of the first sample;
292 * <strong><code> m2</code></strong> is the mean of the second sample;
293 * <strong><code> var1</code></strong> is the variance of the first sample;
294 * <strong><code> var2</code></strong> is the variance of the second sample;
295 * </p><p>
296 * <strong>Preconditions</strong>: <ul>
297 * <li>The observed array lengths must both be at least 2.
298 * </li></ul>
299 *
300 * @param sample1 array of sample data values
301 * @param sample2 array of sample data values
302 * @return t statistic
303 * @throws NullArgumentException if the arrays are <code>null</code>
304 * @throws NumberIsTooSmallException if the length of the arrays is < 2
305 */
306 public double t(final double[] sample1, final double[] sample2)
307 throws NullArgumentException, NumberIsTooSmallException {
308
309 checkSampleData(sample1);
310 checkSampleData(sample2);
311 // No try-catch or advertised exception because args have just been checked
312 return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
313 StatUtils.variance(sample1), StatUtils.variance(sample2),
314 sample1.length, sample2.length);
315 }
316
317 /**
318 * Computes a 2-sample t statistic, comparing the means of the datasets
319 * described by two {@link StatisticalSummary} instances, without the
320 * assumption of equal subpopulation variances. Use
321 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
322 * compute a t-statistic under the equal variances assumption.
323 * <p>
324 * This statistic can be used to perform a two-sample t-test to compare
325 * sample means.</p>
326 * <p>
327 * The returned t-statistic is</p>
328 * <p>
329 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
330 * </p><p>
331 * where <strong><code>n1</code></strong> is the size of the first sample;
332 * <strong><code> n2</code></strong> is the size of the second sample;
333 * <strong><code> m1</code></strong> is the mean of the first sample;
334 * <strong><code> m2</code></strong> is the mean of the second sample
335 * <strong><code> var1</code></strong> is the variance of the first sample;
336 * <strong><code> var2</code></strong> is the variance of the second sample
337 * </p><p>
338 * <strong>Preconditions</strong>: <ul>
339 * <li>The datasets described by the two Univariates must each contain
340 * at least 2 observations.
341 * </li></ul>
342 *
343 * @param sampleStats1 StatisticalSummary describing data from the first sample
344 * @param sampleStats2 StatisticalSummary describing data from the second sample
345 * @return t statistic
346 * @throws NullArgumentException if the sample statistics are <code>null</code>
347 * @throws NumberIsTooSmallException if the number of samples is < 2
348 */
349 public double t(final StatisticalSummary sampleStats1,
350 final StatisticalSummary sampleStats2)
351 throws NullArgumentException, NumberIsTooSmallException {
352
353 checkSampleData(sampleStats1);
354 checkSampleData(sampleStats2);
355 return t(sampleStats1.getMean(), sampleStats2.getMean(),
356 sampleStats1.getVariance(), sampleStats2.getVariance(),
357 sampleStats1.getN(), sampleStats2.getN());
358 }
359
360 /**
361 * Computes a 2-sample t statistic, comparing the means of the datasets
362 * described by two {@link StatisticalSummary} instances, under the
363 * assumption of equal subpopulation variances. To compute a t-statistic
364 * without the equal variances assumption, use
365 * {@link #t(StatisticalSummary, StatisticalSummary)}.
366 * <p>
367 * This statistic can be used to perform a (homoscedastic) two-sample
368 * t-test to compare sample means.</p>
369 * <p>
370 * The t-statistic returned is</p>
371 * <p>
372 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
373 * </p><p>
374 * where <strong><code>n1</code></strong> is the size of first sample;
375 * <strong><code> n2</code></strong> is the size of second sample;
376 * <strong><code> m1</code></strong> is the mean of first sample;
377 * <strong><code> m2</code></strong> is the mean of second sample
378 * and <strong><code>var</code></strong> is the pooled variance estimate:
379 * </p><p>
380 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
381 * </p><p>
382 * with <strong><code>var1</code></strong> the variance of the first sample and
383 * <strong><code>var2</code></strong> the variance of the second sample.
384 * </p><p>
385 * <strong>Preconditions</strong>: <ul>
386 * <li>The datasets described by the two Univariates must each contain
387 * at least 2 observations.
388 * </li></ul>
389 *
390 * @param sampleStats1 StatisticalSummary describing data from the first sample
391 * @param sampleStats2 StatisticalSummary describing data from the second sample
392 * @return t statistic
393 * @throws NullArgumentException if the sample statistics are <code>null</code>
394 * @throws NumberIsTooSmallException if the number of samples is < 2
395 */
396 public double homoscedasticT(final StatisticalSummary sampleStats1,
397 final StatisticalSummary sampleStats2)
398 throws NullArgumentException, NumberIsTooSmallException {
399
400 checkSampleData(sampleStats1);
401 checkSampleData(sampleStats2);
402 return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
403 sampleStats1.getVariance(), sampleStats2.getVariance(),
404 sampleStats1.getN(), sampleStats2.getN());
405 }
406
407 /**
408 * Returns the <i>observed significance level</i>, or
409 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
410 * comparing the mean of the input array with the constant <code>mu</code>.
411 * <p>
412 * The number returned is the smallest significance level
413 * at which one can reject the null hypothesis that the mean equals
414 * <code>mu</code> in favor of the two-sided alternative that the mean
415 * is different from <code>mu</code>. For a one-sided test, divide the
416 * returned value by 2.</p>
417 * <p>
418 * <strong>Usage Note:</strong><br>
419 * The validity of the test depends on the assumptions of the parametric
420 * t-test procedure, as discussed
421 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
422 * </p><p>
423 * <strong>Preconditions</strong>: <ul>
424 * <li>The observed array length must be at least 2.
425 * </li></ul>
426 *
427 * @param mu constant value to compare sample mean against
428 * @param sample array of sample data values
429 * @return p-value
430 * @throws NullArgumentException if the sample array is <code>null</code>
431 * @throws NumberIsTooSmallException if the length of the array is < 2
432 * @throws MaxCountExceededException if an error occurs computing the p-value
433 */
434 public double tTest(final double mu, final double[] sample)
435 throws NullArgumentException, NumberIsTooSmallException,
436 MaxCountExceededException {
437
438 checkSampleData(sample);
439 // No try-catch or advertised exception because args have just been checked
440 return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
441 sample.length);
442 }
443
444 /**
445 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
446 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
447 * which <code>sample</code> is drawn equals <code>mu</code>.
448 * <p>
449 * Returns <code>true</code> iff the null hypothesis can be
450 * rejected with confidence <code>1 - alpha</code>. To
451 * perform a 1-sided test, use <code>alpha * 2</code></p>
452 * <p>
453 * <strong>Examples:</strong><br><ol>
454 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
455 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
456 * </li>
457 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
458 * at the 99% level, first verify that the measured sample mean is less
459 * than <code>mu</code> and then use
460 * <br><code>tTest(mu, sample, 0.02) </code>
461 * </li></ol>
462 * <p>
463 * <strong>Usage Note:</strong><br>
464 * The validity of the test depends on the assumptions of the one-sample
465 * parametric t-test procedure, as discussed
466 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
467 * </p><p>
468 * <strong>Preconditions</strong>: <ul>
469 * <li>The observed array length must be at least 2.
470 * </li></ul>
471 *
472 * @param mu constant value to compare sample mean against
473 * @param sample array of sample data values
474 * @param alpha significance level of the test
475 * @return p-value
476 * @throws NullArgumentException if the sample array is <code>null</code>
477 * @throws NumberIsTooSmallException if the length of the array is < 2
478 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
479 * @throws MaxCountExceededException if an error computing the p-value
480 */
481 public boolean tTest(final double mu, final double[] sample, final double alpha)
482 throws NullArgumentException, NumberIsTooSmallException,
483 OutOfRangeException, MaxCountExceededException {
484
485 checkSignificanceLevel(alpha);
486 return tTest(mu, sample) < alpha;
487 }
488
489 /**
490 * Returns the <i>observed significance level</i>, or
491 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
492 * comparing the mean of the dataset described by <code>sampleStats</code>
493 * with the constant <code>mu</code>.
494 * <p>
495 * The number returned is the smallest significance level
496 * at which one can reject the null hypothesis that the mean equals
497 * <code>mu</code> in favor of the two-sided alternative that the mean
498 * is different from <code>mu</code>. For a one-sided test, divide the
499 * returned value by 2.</p>
500 * <p>
501 * <strong>Usage Note:</strong><br>
502 * The validity of the test depends on the assumptions of the parametric
503 * t-test procedure, as discussed
504 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
505 * here</a></p>
506 * <p>
507 * <strong>Preconditions</strong>: <ul>
508 * <li>The sample must contain at least 2 observations.
509 * </li></ul>
510 *
511 * @param mu constant value to compare sample mean against
512 * @param sampleStats StatisticalSummary describing sample data
513 * @return p-value
514 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
515 * @throws NumberIsTooSmallException if the number of samples is < 2
516 * @throws MaxCountExceededException if an error occurs computing the p-value
517 */
518 public double tTest(final double mu, final StatisticalSummary sampleStats)
519 throws NullArgumentException, NumberIsTooSmallException,
520 MaxCountExceededException {
521
522 checkSampleData(sampleStats);
523 return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
524 sampleStats.getN());
525 }
526
527 /**
528 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
529 * two-sided t-test</a> evaluating the null hypothesis that the mean of the
530 * population from which the dataset described by <code>stats</code> is
531 * drawn equals <code>mu</code>.
532 * <p>
533 * Returns <code>true</code> iff the null hypothesis can be rejected with
534 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
535 * <code>alpha * 2.</code></p>
536 * <p>
537 * <strong>Examples:</strong><br><ol>
538 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
539 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
540 * </li>
541 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
542 * at the 99% level, first verify that the measured sample mean is less
543 * than <code>mu</code> and then use
544 * <br><code>tTest(mu, sampleStats, 0.02) </code>
545 * </li></ol>
546 * <p>
547 * <strong>Usage Note:</strong><br>
548 * The validity of the test depends on the assumptions of the one-sample
549 * parametric t-test procedure, as discussed
550 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
551 * </p><p>
552 * <strong>Preconditions</strong>: <ul>
553 * <li>The sample must include at least 2 observations.
554 * </li></ul>
555 *
556 * @param mu constant value to compare sample mean against
557 * @param sampleStats StatisticalSummary describing sample data values
558 * @param alpha significance level of the test
559 * @return p-value
560 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
561 * @throws NumberIsTooSmallException if the number of samples is < 2
562 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
563 * @throws MaxCountExceededException if an error occurs computing the p-value
564 */
565 public boolean tTest(final double mu, final StatisticalSummary sampleStats,
566 final double alpha)
567 throws NullArgumentException, NumberIsTooSmallException,
568 OutOfRangeException, MaxCountExceededException {
569
570 checkSignificanceLevel(alpha);
571 return tTest(mu, sampleStats) < alpha;
572 }
573
574 /**
575 * Returns the <i>observed significance level</i>, or
576 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
577 * comparing the means of the input arrays.
578 * <p>
579 * The number returned is the smallest significance level
580 * at which one can reject the null hypothesis that the two means are
581 * equal in favor of the two-sided alternative that they are different.
582 * For a one-sided test, divide the returned value by 2.</p>
583 * <p>
584 * The test does not assume that the underlying popuation variances are
585 * equal and it uses approximated degrees of freedom computed from the
586 * sample data to compute the p-value. The t-statistic used is as defined in
587 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
588 * to the degrees of freedom is used,
589 * as described
590 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
591 * here.</a> To perform the test under the assumption of equal subpopulation
592 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
593 * <p>
594 * <strong>Usage Note:</strong><br>
595 * The validity of the p-value depends on the assumptions of the parametric
596 * t-test procedure, as discussed
597 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
598 * here</a></p>
599 * <p>
600 * <strong>Preconditions</strong>: <ul>
601 * <li>The observed array lengths must both be at least 2.
602 * </li></ul>
603 *
604 * @param sample1 array of sample data values
605 * @param sample2 array of sample data values
606 * @return p-value for t-test
607 * @throws NullArgumentException if the arrays are <code>null</code>
608 * @throws NumberIsTooSmallException if the length of the arrays is < 2
609 * @throws MaxCountExceededException if an error occurs computing the p-value
610 */
611 public double tTest(final double[] sample1, final double[] sample2)
612 throws NullArgumentException, NumberIsTooSmallException,
613 MaxCountExceededException {
614
615 checkSampleData(sample1);
616 checkSampleData(sample2);
617 // No try-catch or advertised exception because args have just been checked
618 return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
619 StatUtils.variance(sample1), StatUtils.variance(sample2),
620 sample1.length, sample2.length);
621 }
622
623 /**
624 * Returns the <i>observed significance level</i>, or
625 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
626 * comparing the means of the input arrays, under the assumption that
627 * the two samples are drawn from subpopulations with equal variances.
628 * To perform the test without the equal variances assumption, use
629 * {@link #tTest(double[], double[])}.
630 * <p>
631 * The number returned is the smallest significance level
632 * at which one can reject the null hypothesis that the two means are
633 * equal in favor of the two-sided alternative that they are different.
634 * For a one-sided test, divide the returned value by 2.</p>
635 * <p>
636 * A pooled variance estimate is used to compute the t-statistic. See
637 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
638 * minus 2 is used as the degrees of freedom.</p>
639 * <p>
640 * <strong>Usage Note:</strong><br>
641 * The validity of the p-value depends on the assumptions of the parametric
642 * t-test procedure, as discussed
643 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
644 * here</a></p>
645 * <p>
646 * <strong>Preconditions</strong>: <ul>
647 * <li>The observed array lengths must both be at least 2.
648 * </li></ul>
649 *
650 * @param sample1 array of sample data values
651 * @param sample2 array of sample data values
652 * @return p-value for t-test
653 * @throws NullArgumentException if the arrays are <code>null</code>
654 * @throws NumberIsTooSmallException if the length of the arrays is < 2
655 * @throws MaxCountExceededException if an error occurs computing the p-value
656 */
657 public double homoscedasticTTest(final double[] sample1, final double[] sample2)
658 throws NullArgumentException, NumberIsTooSmallException,
659 MaxCountExceededException {
660
661 checkSampleData(sample1);
662 checkSampleData(sample2);
663 // No try-catch or advertised exception because args have just been checked
664 return homoscedasticTTest(StatUtils.mean(sample1),
665 StatUtils.mean(sample2),
666 StatUtils.variance(sample1),
667 StatUtils.variance(sample2),
668 sample1.length, sample2.length);
669 }
670
671 /**
672 * Performs a
673 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
674 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
675 * and <code>sample2</code> are drawn from populations with the same mean,
676 * with significance level <code>alpha</code>. This test does not assume
677 * that the subpopulation variances are equal. To perform the test assuming
678 * equal variances, use
679 * {@link #homoscedasticTTest(double[], double[], double)}.
680 * <p>
681 * Returns <code>true</code> iff the null hypothesis that the means are
682 * equal can be rejected with confidence <code>1 - alpha</code>. To
683 * perform a 1-sided test, use <code>alpha * 2</code></p>
684 * <p>
685 * See {@link #t(double[], double[])} for the formula used to compute the
686 * t-statistic. Degrees of freedom are approximated using the
687 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
688 * Welch-Satterthwaite approximation.</a></p>
689 * <p>
690 * <strong>Examples:</strong><br><ol>
691 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
692 * the 95% level, use
693 * <br><code>tTest(sample1, sample2, 0.05). </code>
694 * </li>
695 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
696 * at the 99% level, first verify that the measured mean of <code>sample 1</code>
697 * is less than the mean of <code>sample 2</code> and then use
698 * <br><code>tTest(sample1, sample2, 0.02) </code>
699 * </li></ol>
700 * <p>
701 * <strong>Usage Note:</strong><br>
702 * The validity of the test depends on the assumptions of the parametric
703 * t-test procedure, as discussed
704 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
705 * here</a></p>
706 * <p>
707 * <strong>Preconditions</strong>: <ul>
708 * <li>The observed array lengths must both be at least 2.
709 * </li>
710 * <li> <code> 0 < alpha < 0.5 </code>
711 * </li></ul>
712 *
713 * @param sample1 array of sample data values
714 * @param sample2 array of sample data values
715 * @param alpha significance level of the test
716 * @return true if the null hypothesis can be rejected with
717 * confidence 1 - alpha
718 * @throws NullArgumentException if the arrays are <code>null</code>
719 * @throws NumberIsTooSmallException if the length of the arrays is < 2
720 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
721 * @throws MaxCountExceededException if an error occurs computing the p-value
722 */
723 public boolean tTest(final double[] sample1, final double[] sample2,
724 final double alpha)
725 throws NullArgumentException, NumberIsTooSmallException,
726 OutOfRangeException, MaxCountExceededException {
727
728 checkSignificanceLevel(alpha);
729 return tTest(sample1, sample2) < alpha;
730 }
731
732 /**
733 * Performs a
734 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
735 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
736 * and <code>sample2</code> are drawn from populations with the same mean,
737 * with significance level <code>alpha</code>, assuming that the
738 * subpopulation variances are equal. Use
739 * {@link #tTest(double[], double[], double)} to perform the test without
740 * the assumption of equal variances.
741 * <p>
742 * Returns <code>true</code> iff the null hypothesis that the means are
743 * equal can be rejected with confidence <code>1 - alpha</code>. To
744 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
745 * without the assumption of equal subpopulation variances, use
746 * {@link #tTest(double[], double[], double)}.</p>
747 * <p>
748 * A pooled variance estimate is used to compute the t-statistic. See
749 * {@link #t(double[], double[])} for the formula. The sum of the sample
750 * sizes minus 2 is used as the degrees of freedom.</p>
751 * <p>
752 * <strong>Examples:</strong><br><ol>
753 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
754 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
755 * </li>
756 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
757 * at the 99% level, first verify that the measured mean of
758 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
759 * and then use
760 * <br><code>tTest(sample1, sample2, 0.02) </code>
761 * </li></ol>
762 * <p>
763 * <strong>Usage Note:</strong><br>
764 * The validity of the test depends on the assumptions of the parametric
765 * t-test procedure, as discussed
766 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
767 * here</a></p>
768 * <p>
769 * <strong>Preconditions</strong>: <ul>
770 * <li>The observed array lengths must both be at least 2.
771 * </li>
772 * <li> <code> 0 < alpha < 0.5 </code>
773 * </li></ul>
774 *
775 * @param sample1 array of sample data values
776 * @param sample2 array of sample data values
777 * @param alpha significance level of the test
778 * @return true if the null hypothesis can be rejected with
779 * confidence 1 - alpha
780 * @throws NullArgumentException if the arrays are <code>null</code>
781 * @throws NumberIsTooSmallException if the length of the arrays is < 2
782 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
783 * @throws MaxCountExceededException if an error occurs computing the p-value
784 */
785 public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
786 final double alpha)
787 throws NullArgumentException, NumberIsTooSmallException,
788 OutOfRangeException, MaxCountExceededException {
789
790 checkSignificanceLevel(alpha);
791 return homoscedasticTTest(sample1, sample2) < alpha;
792 }
793
794 /**
795 * Returns the <i>observed significance level</i>, or
796 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
797 * comparing the means of the datasets described by two StatisticalSummary
798 * instances.
799 * <p>
800 * The number returned is the smallest significance level
801 * at which one can reject the null hypothesis that the two means are
802 * equal in favor of the two-sided alternative that they are different.
803 * For a one-sided test, divide the returned value by 2.</p>
804 * <p>
805 * The test does not assume that the underlying population variances are
806 * equal and it uses approximated degrees of freedom computed from the
807 * sample data to compute the p-value. To perform the test assuming
808 * equal variances, use
809 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
810 * <p>
811 * <strong>Usage Note:</strong><br>
812 * The validity of the p-value depends on the assumptions of the parametric
813 * t-test procedure, as discussed
814 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
815 * here</a></p>
816 * <p>
817 * <strong>Preconditions</strong>: <ul>
818 * <li>The datasets described by the two Univariates must each contain
819 * at least 2 observations.
820 * </li></ul>
821 *
822 * @param sampleStats1 StatisticalSummary describing data from the first sample
823 * @param sampleStats2 StatisticalSummary describing data from the second sample
824 * @return p-value for t-test
825 * @throws NullArgumentException if the sample statistics are <code>null</code>
826 * @throws NumberIsTooSmallException if the number of samples is < 2
827 * @throws MaxCountExceededException if an error occurs computing the p-value
828 */
829 public double tTest(final StatisticalSummary sampleStats1,
830 final StatisticalSummary sampleStats2)
831 throws NullArgumentException, NumberIsTooSmallException,
832 MaxCountExceededException {
833
834 checkSampleData(sampleStats1);
835 checkSampleData(sampleStats2);
836 return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
837 sampleStats1.getVariance(), sampleStats2.getVariance(),
838 sampleStats1.getN(), sampleStats2.getN());
839 }
840
841 /**
842 * Returns the <i>observed significance level</i>, or
843 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
844 * comparing the means of the datasets described by two StatisticalSummary
845 * instances, under the hypothesis of equal subpopulation variances. To
846 * perform a test without the equal variances assumption, use
847 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
848 * <p>
849 * The number returned is the smallest significance level
850 * at which one can reject the null hypothesis that the two means are
851 * equal in favor of the two-sided alternative that they are different.
852 * For a one-sided test, divide the returned value by 2.</p>
853 * <p>
854 * See {@link #homoscedasticT(double[], double[])} for the formula used to
855 * compute the t-statistic. The sum of the sample sizes minus 2 is used as
856 * the degrees of freedom.</p>
857 * <p>
858 * <strong>Usage Note:</strong><br>
859 * The validity of the p-value depends on the assumptions of the parametric
860 * t-test procedure, as discussed
861 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
862 * </p><p>
863 * <strong>Preconditions</strong>: <ul>
864 * <li>The datasets described by the two Univariates must each contain
865 * at least 2 observations.
866 * </li></ul>
867 *
868 * @param sampleStats1 StatisticalSummary describing data from the first sample
869 * @param sampleStats2 StatisticalSummary describing data from the second sample
870 * @return p-value for t-test
871 * @throws NullArgumentException if the sample statistics are <code>null</code>
872 * @throws NumberIsTooSmallException if the number of samples is < 2
873 * @throws MaxCountExceededException if an error occurs computing the p-value
874 */
875 public double homoscedasticTTest(final StatisticalSummary sampleStats1,
876 final StatisticalSummary sampleStats2)
877 throws NullArgumentException, NumberIsTooSmallException,
878 MaxCountExceededException {
879
880 checkSampleData(sampleStats1);
881 checkSampleData(sampleStats2);
882 return homoscedasticTTest(sampleStats1.getMean(),
883 sampleStats2.getMean(),
884 sampleStats1.getVariance(),
885 sampleStats2.getVariance(),
886 sampleStats1.getN(), sampleStats2.getN());
887 }
888
889 /**
890 * Performs a
891 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
892 * two-sided t-test</a> evaluating the null hypothesis that
893 * <code>sampleStats1</code> and <code>sampleStats2</code> describe
894 * datasets drawn from populations with the same mean, with significance
895 * level <code>alpha</code>. This test does not assume that the
896 * subpopulation variances are equal. To perform the test under the equal
897 * variances assumption, use
898 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
899 * <p>
900 * Returns <code>true</code> iff the null hypothesis that the means are
901 * equal can be rejected with confidence <code>1 - alpha</code>. To
902 * perform a 1-sided test, use <code>alpha * 2</code></p>
903 * <p>
904 * See {@link #t(double[], double[])} for the formula used to compute the
905 * t-statistic. Degrees of freedom are approximated using the
906 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
907 * Welch-Satterthwaite approximation.</a></p>
908 * <p>
909 * <strong>Examples:</strong><br><ol>
910 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
911 * the 95%, use
912 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
913 * </li>
914 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
915 * at the 99% level, first verify that the measured mean of
916 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
917 * and then use
918 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
919 * </li></ol>
920 * <p>
921 * <strong>Usage Note:</strong><br>
922 * The validity of the test depends on the assumptions of the parametric
923 * t-test procedure, as discussed
924 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
925 * here</a></p>
926 * <p>
927 * <strong>Preconditions</strong>: <ul>
928 * <li>The datasets described by the two Univariates must each contain
929 * at least 2 observations.
930 * </li>
931 * <li> <code> 0 < alpha < 0.5 </code>
932 * </li></ul>
933 *
934 * @param sampleStats1 StatisticalSummary describing sample data values
935 * @param sampleStats2 StatisticalSummary describing sample data values
936 * @param alpha significance level of the test
937 * @return true if the null hypothesis can be rejected with
938 * confidence 1 - alpha
939 * @throws NullArgumentException if the sample statistics are <code>null</code>
940 * @throws NumberIsTooSmallException if the number of samples is < 2
941 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
942 * @throws MaxCountExceededException if an error occurs computing the p-value
943 */
944 public boolean tTest(final StatisticalSummary sampleStats1,
945 final StatisticalSummary sampleStats2,
946 final double alpha)
947 throws NullArgumentException, NumberIsTooSmallException,
948 OutOfRangeException, MaxCountExceededException {
949
950 checkSignificanceLevel(alpha);
951 return tTest(sampleStats1, sampleStats2) < alpha;
952 }
953
954 //----------------------------------------------- Protected methods
955
956 /**
957 * Computes approximate degrees of freedom for 2-sample t-test.
958 *
959 * @param v1 first sample variance
960 * @param v2 second sample variance
961 * @param n1 first sample n
962 * @param n2 second sample n
963 * @return approximate degrees of freedom
964 */
965 protected double df(double v1, double v2, double n1, double n2) {
966 return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
967 ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
968 (n2 * n2 * (n2 - 1d)));
969 }
970
971 /**
972 * Computes t test statistic for 1-sample t-test.
973 *
974 * @param m sample mean
975 * @param mu constant to test against
976 * @param v sample variance
977 * @param n sample n
978 * @return t test statistic
979 */
980 protected double t(final double m, final double mu,
981 final double v, final double n) {
982 return (m - mu) / JdkMath.sqrt(v / n);
983 }
984
985 /**
986 * Computes t test statistic for 2-sample t-test.
987 * <p>
988 * Does not assume that subpopulation variances are equal.</p>
989 *
990 * @param m1 first sample mean
991 * @param m2 second sample mean
992 * @param v1 first sample variance
993 * @param v2 second sample variance
994 * @param n1 first sample n
995 * @param n2 second sample n
996 * @return t test statistic
997 */
998 protected double t(final double m1, final double m2,
999 final double v1, final double v2,
1000 final double n1, final double n2) {
1001 return (m1 - m2) / JdkMath.sqrt((v1 / n1) + (v2 / n2));
1002 }
1003
1004 /**
1005 * Computes t test statistic for 2-sample t-test under the hypothesis
1006 * of equal subpopulation variances.
1007 *
1008 * @param m1 first sample mean
1009 * @param m2 second sample mean
1010 * @param v1 first sample variance
1011 * @param v2 second sample variance
1012 * @param n1 first sample n
1013 * @param n2 second sample n
1014 * @return t test statistic
1015 */
1016 protected double homoscedasticT(final double m1, final double m2,
1017 final double v1, final double v2,
1018 final double n1, final double n2) {
1019 final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1020 return (m1 - m2) / JdkMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1021 }
1022
1023 /**
1024 * Computes p-value for 2-sided, 1-sample t-test.
1025 *
1026 * @param m sample mean
1027 * @param mu constant to test against
1028 * @param v sample variance
1029 * @param n sample n
1030 * @return p-value
1031 * @throws MaxCountExceededException if an error occurs computing the p-value
1032 * @throws MathIllegalArgumentException if n is not greater than 1
1033 */
1034 protected double tTest(final double m, final double mu,
1035 final double v, final double n)
1036 throws MaxCountExceededException, MathIllegalArgumentException {
1037
1038 final double t = JdkMath.abs(t(m, mu, v, n));
1039 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1040 final TDistribution distribution = TDistribution.of(n - 1);
1041 return 2.0 * distribution.cumulativeProbability(-t);
1042 }
1043
1044 /**
1045 * Computes p-value for 2-sided, 2-sample t-test.
1046 * <p>
1047 * Does not assume subpopulation variances are equal. Degrees of freedom
1048 * are estimated from the data.</p>
1049 *
1050 * @param m1 first sample mean
1051 * @param m2 second sample mean
1052 * @param v1 first sample variance
1053 * @param v2 second sample variance
1054 * @param n1 first sample n
1055 * @param n2 second sample n
1056 * @return p-value
1057 * @throws MaxCountExceededException if an error occurs computing the p-value
1058 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1059 * strictly positive
1060 */
1061 protected double tTest(final double m1, final double m2,
1062 final double v1, final double v2,
1063 final double n1, final double n2)
1064 throws MaxCountExceededException, NotStrictlyPositiveException {
1065
1066 final double t = JdkMath.abs(t(m1, m2, v1, v2, n1, n2));
1067 final double degreesOfFreedom = df(v1, v2, n1, n2);
1068 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1069 final TDistribution distribution = TDistribution.of(degreesOfFreedom);
1070 return 2.0 * distribution.cumulativeProbability(-t);
1071 }
1072
1073 /**
1074 * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1075 * of equal subpopulation variances.
1076 * <p>
1077 * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1078 *
1079 * @param m1 first sample mean
1080 * @param m2 second sample mean
1081 * @param v1 first sample variance
1082 * @param v2 second sample variance
1083 * @param n1 first sample n
1084 * @param n2 second sample n
1085 * @return p-value
1086 * @throws MaxCountExceededException if an error occurs computing the p-value
1087 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1088 * strictly positive
1089 */
1090 protected double homoscedasticTTest(double m1, double m2,
1091 double v1, double v2,
1092 double n1, double n2)
1093 throws MaxCountExceededException, NotStrictlyPositiveException {
1094
1095 final double t = JdkMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1096 final double degreesOfFreedom = n1 + n2 - 2;
1097 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
1098 final TDistribution distribution = TDistribution.of(degreesOfFreedom);
1099 return 2.0 * distribution.cumulativeProbability(-t);
1100 }
1101
1102 /**
1103 * Check significance level.
1104 *
1105 * @param alpha significance level
1106 * @throws OutOfRangeException if the significance level is out of bounds.
1107 */
1108 private void checkSignificanceLevel(final double alpha)
1109 throws OutOfRangeException {
1110
1111 if (alpha <= 0 || alpha > 0.5) {
1112 throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1113 alpha, 0.0, 0.5);
1114 }
1115 }
1116
1117 /**
1118 * Check sample data.
1119 *
1120 * @param data Sample data.
1121 * @throws NullArgumentException if {@code data} is {@code null}.
1122 * @throws NumberIsTooSmallException if there is not enough sample data.
1123 */
1124 private void checkSampleData(final double[] data)
1125 throws NullArgumentException, NumberIsTooSmallException {
1126
1127 if (data == null) {
1128 throw new NullArgumentException();
1129 }
1130 if (data.length < 2) {
1131 throw new NumberIsTooSmallException(
1132 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1133 data.length, 2, true);
1134 }
1135 }
1136
1137 /**
1138 * Check sample data.
1139 *
1140 * @param stat Statistical summary.
1141 * @throws NullArgumentException if {@code data} is {@code null}.
1142 * @throws NumberIsTooSmallException if there is not enough sample data.
1143 */
1144 private void checkSampleData(final StatisticalSummary stat)
1145 throws NullArgumentException, NumberIsTooSmallException {
1146
1147 if (stat == null) {
1148 throw new NullArgumentException();
1149 }
1150 if (stat.getN() < 2) {
1151 throw new NumberIsTooSmallException(
1152 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1153 stat.getN(), 2, true);
1154 }
1155 }
1156 }