1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math4.legacy.stat.inference; 18 19 import org.apache.commons.statistics.distribution.TDistribution; 20 import org.apache.commons.math4.legacy.exception.DimensionMismatchException; 21 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException; 22 import org.apache.commons.math4.legacy.exception.MaxCountExceededException; 23 import org.apache.commons.math4.legacy.exception.NoDataException; 24 import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException; 25 import org.apache.commons.math4.legacy.exception.NullArgumentException; 26 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException; 27 import org.apache.commons.math4.legacy.exception.OutOfRangeException; 28 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 29 import org.apache.commons.math4.legacy.stat.StatUtils; 30 import org.apache.commons.math4.legacy.stat.descriptive.StatisticalSummary; 31 import org.apache.commons.math4.core.jdkmath.JdkMath; 32 33 /** 34 * An implementation for Student's t-tests. 35 * <p> 36 * Tests can be:<ul> 37 * <li>One-sample or two-sample</li> 38 * <li>One-sided or two-sided</li> 39 * <li>Paired or unpaired (for two-sample tests)</li> 40 * <li>Homoscedastic (equal variance assumption) or heteroscedastic 41 * (for two sample tests)</li> 42 * <li>Fixed significance level (boolean-valued) or returning p-values. 43 * </li></ul> 44 * <p> 45 * Test statistics are available for all tests. Methods including "Test" in 46 * in their names perform tests, all other methods return t-statistics. Among 47 * the "Test" methods, <code>double-</code>valued methods return p-values; 48 * <code>boolean-</code>valued methods perform fixed significance level tests. 49 * Significance levels are always specified as numbers between 0 and 0.5 50 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p> 51 * <p> 52 * Input to tests can be either <code>double[]</code> arrays or 53 * {@link StatisticalSummary} instances.</p><p> 54 * Uses commons-math {@link org.apache.commons.statistics.distribution.TDistribution} 55 * implementation to estimate exact p-values.</p> 56 * 57 */ 58 public class TTest { 59 /** 60 * Computes a paired, 2-sample t-statistic based on the data in the input 61 * arrays. The t-statistic returned is equivalent to what would be returned by 62 * computing the one-sample t-statistic {@link #t(double, double[])}, with 63 * <code>mu = 0</code> and the sample array consisting of the (signed) 64 * differences between corresponding entries in <code>sample1</code> and 65 * <code>sample2.</code> 66 * <p> 67 * <strong>Preconditions</strong>: <ul> 68 * <li>The input arrays must have the same length and their common length 69 * must be at least 2. 70 * </li></ul> 71 * 72 * @param sample1 array of sample data values 73 * @param sample2 array of sample data values 74 * @return t statistic 75 * @throws NullArgumentException if the arrays are <code>null</code> 76 * @throws NoDataException if the arrays are empty 77 * @throws DimensionMismatchException if the length of the arrays is not equal 78 * @throws NumberIsTooSmallException if the length of the arrays is < 2 79 */ 80 public double pairedT(final double[] sample1, final double[] sample2) 81 throws NullArgumentException, NoDataException, 82 DimensionMismatchException, NumberIsTooSmallException { 83 84 checkSampleData(sample1); 85 checkSampleData(sample2); 86 double meanDifference = StatUtils.meanDifference(sample1, sample2); 87 return t(meanDifference, 0, 88 StatUtils.varianceDifference(sample1, sample2, meanDifference), 89 sample1.length); 90 } 91 92 /** 93 * Returns the <i>observed significance level</i>, or 94 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test 95 * based on the data in the input arrays. 96 * <p> 97 * The number returned is the smallest significance level 98 * at which one can reject the null hypothesis that the mean of the paired 99 * differences is 0 in favor of the two-sided alternative that the mean paired 100 * difference is not equal to 0. For a one-sided test, divide the returned 101 * value by 2.</p> 102 * <p> 103 * This test is equivalent to a one-sample t-test computed using 104 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample 105 * array consisting of the signed differences between corresponding elements of 106 * <code>sample1</code> and <code>sample2.</code></p> 107 * <p> 108 * <strong>Usage Note:</strong><br> 109 * The validity of the p-value depends on the assumptions of the parametric 110 * t-test procedure, as discussed 111 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 112 * here</a></p> 113 * <p> 114 * <strong>Preconditions</strong>: <ul> 115 * <li>The input array lengths must be the same and their common length must 116 * be at least 2. 117 * </li></ul> 118 * 119 * @param sample1 array of sample data values 120 * @param sample2 array of sample data values 121 * @return p-value for t-test 122 * @throws NullArgumentException if the arrays are <code>null</code> 123 * @throws NoDataException if the arrays are empty 124 * @throws DimensionMismatchException if the length of the arrays is not equal 125 * @throws NumberIsTooSmallException if the length of the arrays is < 2 126 * @throws MaxCountExceededException if an error occurs computing the p-value 127 */ 128 public double pairedTTest(final double[] sample1, final double[] sample2) 129 throws NullArgumentException, NoDataException, DimensionMismatchException, 130 NumberIsTooSmallException, MaxCountExceededException { 131 132 double meanDifference = StatUtils.meanDifference(sample1, sample2); 133 return tTest(meanDifference, 0, 134 StatUtils.varianceDifference(sample1, sample2, meanDifference), 135 sample1.length); 136 } 137 138 /** 139 * Performs a paired t-test evaluating the null hypothesis that the 140 * mean of the paired differences between <code>sample1</code> and 141 * <code>sample2</code> is 0 in favor of the two-sided alternative that the 142 * mean paired difference is not equal to 0, with significance level 143 * <code>alpha</code>. 144 * <p> 145 * Returns <code>true</code> iff the null hypothesis can be rejected with 146 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 147 * <code>alpha * 2</code></p> 148 * <p> 149 * <strong>Usage Note:</strong><br> 150 * The validity of the test depends on the assumptions of the parametric 151 * t-test procedure, as discussed 152 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 153 * here</a></p> 154 * <p> 155 * <strong>Preconditions</strong>: <ul> 156 * <li>The input array lengths must be the same and their common length 157 * must be at least 2. 158 * </li> 159 * <li> <code> 0 < alpha < 0.5 </code> 160 * </li></ul> 161 * 162 * @param sample1 array of sample data values 163 * @param sample2 array of sample data values 164 * @param alpha significance level of the test 165 * @return true if the null hypothesis can be rejected with 166 * confidence 1 - alpha 167 * @throws NullArgumentException if the arrays are <code>null</code> 168 * @throws NoDataException if the arrays are empty 169 * @throws DimensionMismatchException if the length of the arrays is not equal 170 * @throws NumberIsTooSmallException if the length of the arrays is < 2 171 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 172 * @throws MaxCountExceededException if an error occurs computing the p-value 173 */ 174 public boolean pairedTTest(final double[] sample1, final double[] sample2, 175 final double alpha) 176 throws NullArgumentException, NoDataException, DimensionMismatchException, 177 NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException { 178 179 checkSignificanceLevel(alpha); 180 return pairedTTest(sample1, sample2) < alpha; 181 } 182 183 /** 184 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 185 * t statistic </a> given observed values and a comparison constant. 186 * <p> 187 * This statistic can be used to perform a one sample t-test for the mean. 188 * </p><p> 189 * <strong>Preconditions</strong>: <ul> 190 * <li>The observed array length must be at least 2. 191 * </li></ul> 192 * 193 * @param mu comparison constant 194 * @param observed array of values 195 * @return t statistic 196 * @throws NullArgumentException if <code>observed</code> is <code>null</code> 197 * @throws NumberIsTooSmallException if the length of <code>observed</code> is < 2 198 */ 199 public double t(final double mu, final double[] observed) 200 throws NullArgumentException, NumberIsTooSmallException { 201 202 checkSampleData(observed); 203 // No try-catch or advertised exception because args have just been checked 204 return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), 205 observed.length); 206 } 207 208 /** 209 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 210 * t statistic </a> to use in comparing the mean of the dataset described by 211 * <code>sampleStats</code> to <code>mu</code>. 212 * <p> 213 * This statistic can be used to perform a one sample t-test for the mean. 214 * </p><p> 215 * <strong>Preconditions</strong>: <ul> 216 * <li><code>observed.getN() ≥ 2</code>. 217 * </li></ul> 218 * 219 * @param mu comparison constant 220 * @param sampleStats DescriptiveStatistics holding sample summary statitstics 221 * @return t statistic 222 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 223 * @throws NumberIsTooSmallException if the number of samples is < 2 224 */ 225 public double t(final double mu, final StatisticalSummary sampleStats) 226 throws NullArgumentException, NumberIsTooSmallException { 227 228 checkSampleData(sampleStats); 229 return t(sampleStats.getMean(), mu, sampleStats.getVariance(), 230 sampleStats.getN()); 231 } 232 233 /** 234 * Computes a 2-sample t statistic, under the hypothesis of equal 235 * subpopulation variances. To compute a t-statistic without the 236 * equal variances hypothesis, use {@link #t(double[], double[])}. 237 * <p> 238 * This statistic can be used to perform a (homoscedastic) two-sample 239 * t-test to compare sample means.</p> 240 * <p> 241 * The t-statistic is</p> 242 * <p> 243 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 244 * </p><p> 245 * where <strong><code>n1</code></strong> is the size of first sample; 246 * <strong><code> n2</code></strong> is the size of second sample; 247 * <strong><code> m1</code></strong> is the mean of first sample; 248 * <strong><code> m2</code></strong> is the mean of second sample 249 * and <strong><code>var</code></strong> is the pooled variance estimate: 250 * </p><p> 251 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 252 * </p><p> 253 * with <strong><code>var1</code></strong> the variance of the first sample and 254 * <strong><code>var2</code></strong> the variance of the second sample. 255 * </p><p> 256 * <strong>Preconditions</strong>: <ul> 257 * <li>The observed array lengths must both be at least 2. 258 * </li></ul> 259 * 260 * @param sample1 array of sample data values 261 * @param sample2 array of sample data values 262 * @return t statistic 263 * @throws NullArgumentException if the arrays are <code>null</code> 264 * @throws NumberIsTooSmallException if the length of the arrays is < 2 265 */ 266 public double homoscedasticT(final double[] sample1, final double[] sample2) 267 throws NullArgumentException, NumberIsTooSmallException { 268 269 checkSampleData(sample1); 270 checkSampleData(sample2); 271 // No try-catch or advertised exception because args have just been checked 272 return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2), 273 StatUtils.variance(sample1), StatUtils.variance(sample2), 274 sample1.length, sample2.length); 275 } 276 277 /** 278 * Computes a 2-sample t statistic, without the hypothesis of equal 279 * subpopulation variances. To compute a t-statistic assuming equal 280 * variances, use {@link #homoscedasticT(double[], double[])}. 281 * <p> 282 * This statistic can be used to perform a two-sample t-test to compare 283 * sample means.</p> 284 * <p> 285 * The t-statistic is</p> 286 * <p> 287 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 288 * </p><p> 289 * where <strong><code>n1</code></strong> is the size of the first sample 290 * <strong><code> n2</code></strong> is the size of the second sample; 291 * <strong><code> m1</code></strong> is the mean of the first sample; 292 * <strong><code> m2</code></strong> is the mean of the second sample; 293 * <strong><code> var1</code></strong> is the variance of the first sample; 294 * <strong><code> var2</code></strong> is the variance of the second sample; 295 * </p><p> 296 * <strong>Preconditions</strong>: <ul> 297 * <li>The observed array lengths must both be at least 2. 298 * </li></ul> 299 * 300 * @param sample1 array of sample data values 301 * @param sample2 array of sample data values 302 * @return t statistic 303 * @throws NullArgumentException if the arrays are <code>null</code> 304 * @throws NumberIsTooSmallException if the length of the arrays is < 2 305 */ 306 public double t(final double[] sample1, final double[] sample2) 307 throws NullArgumentException, NumberIsTooSmallException { 308 309 checkSampleData(sample1); 310 checkSampleData(sample2); 311 // No try-catch or advertised exception because args have just been checked 312 return t(StatUtils.mean(sample1), StatUtils.mean(sample2), 313 StatUtils.variance(sample1), StatUtils.variance(sample2), 314 sample1.length, sample2.length); 315 } 316 317 /** 318 * Computes a 2-sample t statistic, comparing the means of the datasets 319 * described by two {@link StatisticalSummary} instances, without the 320 * assumption of equal subpopulation variances. Use 321 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to 322 * compute a t-statistic under the equal variances assumption. 323 * <p> 324 * This statistic can be used to perform a two-sample t-test to compare 325 * sample means.</p> 326 * <p> 327 * The returned t-statistic is</p> 328 * <p> 329 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 330 * </p><p> 331 * where <strong><code>n1</code></strong> is the size of the first sample; 332 * <strong><code> n2</code></strong> is the size of the second sample; 333 * <strong><code> m1</code></strong> is the mean of the first sample; 334 * <strong><code> m2</code></strong> is the mean of the second sample 335 * <strong><code> var1</code></strong> is the variance of the first sample; 336 * <strong><code> var2</code></strong> is the variance of the second sample 337 * </p><p> 338 * <strong>Preconditions</strong>: <ul> 339 * <li>The datasets described by the two Univariates must each contain 340 * at least 2 observations. 341 * </li></ul> 342 * 343 * @param sampleStats1 StatisticalSummary describing data from the first sample 344 * @param sampleStats2 StatisticalSummary describing data from the second sample 345 * @return t statistic 346 * @throws NullArgumentException if the sample statistics are <code>null</code> 347 * @throws NumberIsTooSmallException if the number of samples is < 2 348 */ 349 public double t(final StatisticalSummary sampleStats1, 350 final StatisticalSummary sampleStats2) 351 throws NullArgumentException, NumberIsTooSmallException { 352 353 checkSampleData(sampleStats1); 354 checkSampleData(sampleStats2); 355 return t(sampleStats1.getMean(), sampleStats2.getMean(), 356 sampleStats1.getVariance(), sampleStats2.getVariance(), 357 sampleStats1.getN(), sampleStats2.getN()); 358 } 359 360 /** 361 * Computes a 2-sample t statistic, comparing the means of the datasets 362 * described by two {@link StatisticalSummary} instances, under the 363 * assumption of equal subpopulation variances. To compute a t-statistic 364 * without the equal variances assumption, use 365 * {@link #t(StatisticalSummary, StatisticalSummary)}. 366 * <p> 367 * This statistic can be used to perform a (homoscedastic) two-sample 368 * t-test to compare sample means.</p> 369 * <p> 370 * The t-statistic returned is</p> 371 * <p> 372 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 373 * </p><p> 374 * where <strong><code>n1</code></strong> is the size of first sample; 375 * <strong><code> n2</code></strong> is the size of second sample; 376 * <strong><code> m1</code></strong> is the mean of first sample; 377 * <strong><code> m2</code></strong> is the mean of second sample 378 * and <strong><code>var</code></strong> is the pooled variance estimate: 379 * </p><p> 380 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 381 * </p><p> 382 * with <strong><code>var1</code></strong> the variance of the first sample and 383 * <strong><code>var2</code></strong> the variance of the second sample. 384 * </p><p> 385 * <strong>Preconditions</strong>: <ul> 386 * <li>The datasets described by the two Univariates must each contain 387 * at least 2 observations. 388 * </li></ul> 389 * 390 * @param sampleStats1 StatisticalSummary describing data from the first sample 391 * @param sampleStats2 StatisticalSummary describing data from the second sample 392 * @return t statistic 393 * @throws NullArgumentException if the sample statistics are <code>null</code> 394 * @throws NumberIsTooSmallException if the number of samples is < 2 395 */ 396 public double homoscedasticT(final StatisticalSummary sampleStats1, 397 final StatisticalSummary sampleStats2) 398 throws NullArgumentException, NumberIsTooSmallException { 399 400 checkSampleData(sampleStats1); 401 checkSampleData(sampleStats2); 402 return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), 403 sampleStats1.getVariance(), sampleStats2.getVariance(), 404 sampleStats1.getN(), sampleStats2.getN()); 405 } 406 407 /** 408 * Returns the <i>observed significance level</i>, or 409 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 410 * comparing the mean of the input array with the constant <code>mu</code>. 411 * <p> 412 * The number returned is the smallest significance level 413 * at which one can reject the null hypothesis that the mean equals 414 * <code>mu</code> in favor of the two-sided alternative that the mean 415 * is different from <code>mu</code>. For a one-sided test, divide the 416 * returned value by 2.</p> 417 * <p> 418 * <strong>Usage Note:</strong><br> 419 * The validity of the test depends on the assumptions of the parametric 420 * t-test procedure, as discussed 421 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 422 * </p><p> 423 * <strong>Preconditions</strong>: <ul> 424 * <li>The observed array length must be at least 2. 425 * </li></ul> 426 * 427 * @param mu constant value to compare sample mean against 428 * @param sample array of sample data values 429 * @return p-value 430 * @throws NullArgumentException if the sample array is <code>null</code> 431 * @throws NumberIsTooSmallException if the length of the array is < 2 432 * @throws MaxCountExceededException if an error occurs computing the p-value 433 */ 434 public double tTest(final double mu, final double[] sample) 435 throws NullArgumentException, NumberIsTooSmallException, 436 MaxCountExceededException { 437 438 checkSampleData(sample); 439 // No try-catch or advertised exception because args have just been checked 440 return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample), 441 sample.length); 442 } 443 444 /** 445 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 446 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from 447 * which <code>sample</code> is drawn equals <code>mu</code>. 448 * <p> 449 * Returns <code>true</code> iff the null hypothesis can be 450 * rejected with confidence <code>1 - alpha</code>. To 451 * perform a 1-sided test, use <code>alpha * 2</code></p> 452 * <p> 453 * <strong>Examples:</strong><br><ol> 454 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 455 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code> 456 * </li> 457 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 458 * at the 99% level, first verify that the measured sample mean is less 459 * than <code>mu</code> and then use 460 * <br><code>tTest(mu, sample, 0.02) </code> 461 * </li></ol> 462 * <p> 463 * <strong>Usage Note:</strong><br> 464 * The validity of the test depends on the assumptions of the one-sample 465 * parametric t-test procedure, as discussed 466 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 467 * </p><p> 468 * <strong>Preconditions</strong>: <ul> 469 * <li>The observed array length must be at least 2. 470 * </li></ul> 471 * 472 * @param mu constant value to compare sample mean against 473 * @param sample array of sample data values 474 * @param alpha significance level of the test 475 * @return p-value 476 * @throws NullArgumentException if the sample array is <code>null</code> 477 * @throws NumberIsTooSmallException if the length of the array is < 2 478 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 479 * @throws MaxCountExceededException if an error computing the p-value 480 */ 481 public boolean tTest(final double mu, final double[] sample, final double alpha) 482 throws NullArgumentException, NumberIsTooSmallException, 483 OutOfRangeException, MaxCountExceededException { 484 485 checkSignificanceLevel(alpha); 486 return tTest(mu, sample) < alpha; 487 } 488 489 /** 490 * Returns the <i>observed significance level</i>, or 491 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 492 * comparing the mean of the dataset described by <code>sampleStats</code> 493 * with the constant <code>mu</code>. 494 * <p> 495 * The number returned is the smallest significance level 496 * at which one can reject the null hypothesis that the mean equals 497 * <code>mu</code> in favor of the two-sided alternative that the mean 498 * is different from <code>mu</code>. For a one-sided test, divide the 499 * returned value by 2.</p> 500 * <p> 501 * <strong>Usage Note:</strong><br> 502 * The validity of the test depends on the assumptions of the parametric 503 * t-test procedure, as discussed 504 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 505 * here</a></p> 506 * <p> 507 * <strong>Preconditions</strong>: <ul> 508 * <li>The sample must contain at least 2 observations. 509 * </li></ul> 510 * 511 * @param mu constant value to compare sample mean against 512 * @param sampleStats StatisticalSummary describing sample data 513 * @return p-value 514 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 515 * @throws NumberIsTooSmallException if the number of samples is < 2 516 * @throws MaxCountExceededException if an error occurs computing the p-value 517 */ 518 public double tTest(final double mu, final StatisticalSummary sampleStats) 519 throws NullArgumentException, NumberIsTooSmallException, 520 MaxCountExceededException { 521 522 checkSampleData(sampleStats); 523 return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), 524 sampleStats.getN()); 525 } 526 527 /** 528 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 529 * two-sided t-test</a> evaluating the null hypothesis that the mean of the 530 * population from which the dataset described by <code>stats</code> is 531 * drawn equals <code>mu</code>. 532 * <p> 533 * Returns <code>true</code> iff the null hypothesis can be rejected with 534 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 535 * <code>alpha * 2.</code></p> 536 * <p> 537 * <strong>Examples:</strong><br><ol> 538 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 539 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code> 540 * </li> 541 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 542 * at the 99% level, first verify that the measured sample mean is less 543 * than <code>mu</code> and then use 544 * <br><code>tTest(mu, sampleStats, 0.02) </code> 545 * </li></ol> 546 * <p> 547 * <strong>Usage Note:</strong><br> 548 * The validity of the test depends on the assumptions of the one-sample 549 * parametric t-test procedure, as discussed 550 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 551 * </p><p> 552 * <strong>Preconditions</strong>: <ul> 553 * <li>The sample must include at least 2 observations. 554 * </li></ul> 555 * 556 * @param mu constant value to compare sample mean against 557 * @param sampleStats StatisticalSummary describing sample data values 558 * @param alpha significance level of the test 559 * @return p-value 560 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 561 * @throws NumberIsTooSmallException if the number of samples is < 2 562 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 563 * @throws MaxCountExceededException if an error occurs computing the p-value 564 */ 565 public boolean tTest(final double mu, final StatisticalSummary sampleStats, 566 final double alpha) 567 throws NullArgumentException, NumberIsTooSmallException, 568 OutOfRangeException, MaxCountExceededException { 569 570 checkSignificanceLevel(alpha); 571 return tTest(mu, sampleStats) < alpha; 572 } 573 574 /** 575 * Returns the <i>observed significance level</i>, or 576 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 577 * comparing the means of the input arrays. 578 * <p> 579 * The number returned is the smallest significance level 580 * at which one can reject the null hypothesis that the two means are 581 * equal in favor of the two-sided alternative that they are different. 582 * For a one-sided test, divide the returned value by 2.</p> 583 * <p> 584 * The test does not assume that the underlying popuation variances are 585 * equal and it uses approximated degrees of freedom computed from the 586 * sample data to compute the p-value. The t-statistic used is as defined in 587 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation 588 * to the degrees of freedom is used, 589 * as described 590 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 591 * here.</a> To perform the test under the assumption of equal subpopulation 592 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p> 593 * <p> 594 * <strong>Usage Note:</strong><br> 595 * The validity of the p-value depends on the assumptions of the parametric 596 * t-test procedure, as discussed 597 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 598 * here</a></p> 599 * <p> 600 * <strong>Preconditions</strong>: <ul> 601 * <li>The observed array lengths must both be at least 2. 602 * </li></ul> 603 * 604 * @param sample1 array of sample data values 605 * @param sample2 array of sample data values 606 * @return p-value for t-test 607 * @throws NullArgumentException if the arrays are <code>null</code> 608 * @throws NumberIsTooSmallException if the length of the arrays is < 2 609 * @throws MaxCountExceededException if an error occurs computing the p-value 610 */ 611 public double tTest(final double[] sample1, final double[] sample2) 612 throws NullArgumentException, NumberIsTooSmallException, 613 MaxCountExceededException { 614 615 checkSampleData(sample1); 616 checkSampleData(sample2); 617 // No try-catch or advertised exception because args have just been checked 618 return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), 619 StatUtils.variance(sample1), StatUtils.variance(sample2), 620 sample1.length, sample2.length); 621 } 622 623 /** 624 * Returns the <i>observed significance level</i>, or 625 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 626 * comparing the means of the input arrays, under the assumption that 627 * the two samples are drawn from subpopulations with equal variances. 628 * To perform the test without the equal variances assumption, use 629 * {@link #tTest(double[], double[])}. 630 * <p> 631 * The number returned is the smallest significance level 632 * at which one can reject the null hypothesis that the two means are 633 * equal in favor of the two-sided alternative that they are different. 634 * For a one-sided test, divide the returned value by 2.</p> 635 * <p> 636 * A pooled variance estimate is used to compute the t-statistic. See 637 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes 638 * minus 2 is used as the degrees of freedom.</p> 639 * <p> 640 * <strong>Usage Note:</strong><br> 641 * The validity of the p-value depends on the assumptions of the parametric 642 * t-test procedure, as discussed 643 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 644 * here</a></p> 645 * <p> 646 * <strong>Preconditions</strong>: <ul> 647 * <li>The observed array lengths must both be at least 2. 648 * </li></ul> 649 * 650 * @param sample1 array of sample data values 651 * @param sample2 array of sample data values 652 * @return p-value for t-test 653 * @throws NullArgumentException if the arrays are <code>null</code> 654 * @throws NumberIsTooSmallException if the length of the arrays is < 2 655 * @throws MaxCountExceededException if an error occurs computing the p-value 656 */ 657 public double homoscedasticTTest(final double[] sample1, final double[] sample2) 658 throws NullArgumentException, NumberIsTooSmallException, 659 MaxCountExceededException { 660 661 checkSampleData(sample1); 662 checkSampleData(sample2); 663 // No try-catch or advertised exception because args have just been checked 664 return homoscedasticTTest(StatUtils.mean(sample1), 665 StatUtils.mean(sample2), 666 StatUtils.variance(sample1), 667 StatUtils.variance(sample2), 668 sample1.length, sample2.length); 669 } 670 671 /** 672 * Performs a 673 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 674 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 675 * and <code>sample2</code> are drawn from populations with the same mean, 676 * with significance level <code>alpha</code>. This test does not assume 677 * that the subpopulation variances are equal. To perform the test assuming 678 * equal variances, use 679 * {@link #homoscedasticTTest(double[], double[], double)}. 680 * <p> 681 * Returns <code>true</code> iff the null hypothesis that the means are 682 * equal can be rejected with confidence <code>1 - alpha</code>. To 683 * perform a 1-sided test, use <code>alpha * 2</code></p> 684 * <p> 685 * See {@link #t(double[], double[])} for the formula used to compute the 686 * t-statistic. Degrees of freedom are approximated using the 687 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 688 * Welch-Satterthwaite approximation.</a></p> 689 * <p> 690 * <strong>Examples:</strong><br><ol> 691 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 692 * the 95% level, use 693 * <br><code>tTest(sample1, sample2, 0.05). </code> 694 * </li> 695 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>, 696 * at the 99% level, first verify that the measured mean of <code>sample 1</code> 697 * is less than the mean of <code>sample 2</code> and then use 698 * <br><code>tTest(sample1, sample2, 0.02) </code> 699 * </li></ol> 700 * <p> 701 * <strong>Usage Note:</strong><br> 702 * The validity of the test depends on the assumptions of the parametric 703 * t-test procedure, as discussed 704 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 705 * here</a></p> 706 * <p> 707 * <strong>Preconditions</strong>: <ul> 708 * <li>The observed array lengths must both be at least 2. 709 * </li> 710 * <li> <code> 0 < alpha < 0.5 </code> 711 * </li></ul> 712 * 713 * @param sample1 array of sample data values 714 * @param sample2 array of sample data values 715 * @param alpha significance level of the test 716 * @return true if the null hypothesis can be rejected with 717 * confidence 1 - alpha 718 * @throws NullArgumentException if the arrays are <code>null</code> 719 * @throws NumberIsTooSmallException if the length of the arrays is < 2 720 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 721 * @throws MaxCountExceededException if an error occurs computing the p-value 722 */ 723 public boolean tTest(final double[] sample1, final double[] sample2, 724 final double alpha) 725 throws NullArgumentException, NumberIsTooSmallException, 726 OutOfRangeException, MaxCountExceededException { 727 728 checkSignificanceLevel(alpha); 729 return tTest(sample1, sample2) < alpha; 730 } 731 732 /** 733 * Performs a 734 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 735 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 736 * and <code>sample2</code> are drawn from populations with the same mean, 737 * with significance level <code>alpha</code>, assuming that the 738 * subpopulation variances are equal. Use 739 * {@link #tTest(double[], double[], double)} to perform the test without 740 * the assumption of equal variances. 741 * <p> 742 * Returns <code>true</code> iff the null hypothesis that the means are 743 * equal can be rejected with confidence <code>1 - alpha</code>. To 744 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test 745 * without the assumption of equal subpopulation variances, use 746 * {@link #tTest(double[], double[], double)}.</p> 747 * <p> 748 * A pooled variance estimate is used to compute the t-statistic. See 749 * {@link #t(double[], double[])} for the formula. The sum of the sample 750 * sizes minus 2 is used as the degrees of freedom.</p> 751 * <p> 752 * <strong>Examples:</strong><br><ol> 753 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 754 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code> 755 * </li> 756 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code> 757 * at the 99% level, first verify that the measured mean of 758 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 759 * and then use 760 * <br><code>tTest(sample1, sample2, 0.02) </code> 761 * </li></ol> 762 * <p> 763 * <strong>Usage Note:</strong><br> 764 * The validity of the test depends on the assumptions of the parametric 765 * t-test procedure, as discussed 766 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 767 * here</a></p> 768 * <p> 769 * <strong>Preconditions</strong>: <ul> 770 * <li>The observed array lengths must both be at least 2. 771 * </li> 772 * <li> <code> 0 < alpha < 0.5 </code> 773 * </li></ul> 774 * 775 * @param sample1 array of sample data values 776 * @param sample2 array of sample data values 777 * @param alpha significance level of the test 778 * @return true if the null hypothesis can be rejected with 779 * confidence 1 - alpha 780 * @throws NullArgumentException if the arrays are <code>null</code> 781 * @throws NumberIsTooSmallException if the length of the arrays is < 2 782 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 783 * @throws MaxCountExceededException if an error occurs computing the p-value 784 */ 785 public boolean homoscedasticTTest(final double[] sample1, final double[] sample2, 786 final double alpha) 787 throws NullArgumentException, NumberIsTooSmallException, 788 OutOfRangeException, MaxCountExceededException { 789 790 checkSignificanceLevel(alpha); 791 return homoscedasticTTest(sample1, sample2) < alpha; 792 } 793 794 /** 795 * Returns the <i>observed significance level</i>, or 796 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 797 * comparing the means of the datasets described by two StatisticalSummary 798 * instances. 799 * <p> 800 * The number returned is the smallest significance level 801 * at which one can reject the null hypothesis that the two means are 802 * equal in favor of the two-sided alternative that they are different. 803 * For a one-sided test, divide the returned value by 2.</p> 804 * <p> 805 * The test does not assume that the underlying population variances are 806 * equal and it uses approximated degrees of freedom computed from the 807 * sample data to compute the p-value. To perform the test assuming 808 * equal variances, use 809 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p> 810 * <p> 811 * <strong>Usage Note:</strong><br> 812 * The validity of the p-value depends on the assumptions of the parametric 813 * t-test procedure, as discussed 814 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 815 * here</a></p> 816 * <p> 817 * <strong>Preconditions</strong>: <ul> 818 * <li>The datasets described by the two Univariates must each contain 819 * at least 2 observations. 820 * </li></ul> 821 * 822 * @param sampleStats1 StatisticalSummary describing data from the first sample 823 * @param sampleStats2 StatisticalSummary describing data from the second sample 824 * @return p-value for t-test 825 * @throws NullArgumentException if the sample statistics are <code>null</code> 826 * @throws NumberIsTooSmallException if the number of samples is < 2 827 * @throws MaxCountExceededException if an error occurs computing the p-value 828 */ 829 public double tTest(final StatisticalSummary sampleStats1, 830 final StatisticalSummary sampleStats2) 831 throws NullArgumentException, NumberIsTooSmallException, 832 MaxCountExceededException { 833 834 checkSampleData(sampleStats1); 835 checkSampleData(sampleStats2); 836 return tTest(sampleStats1.getMean(), sampleStats2.getMean(), 837 sampleStats1.getVariance(), sampleStats2.getVariance(), 838 sampleStats1.getN(), sampleStats2.getN()); 839 } 840 841 /** 842 * Returns the <i>observed significance level</i>, or 843 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 844 * comparing the means of the datasets described by two StatisticalSummary 845 * instances, under the hypothesis of equal subpopulation variances. To 846 * perform a test without the equal variances assumption, use 847 * {@link #tTest(StatisticalSummary, StatisticalSummary)}. 848 * <p> 849 * The number returned is the smallest significance level 850 * at which one can reject the null hypothesis that the two means are 851 * equal in favor of the two-sided alternative that they are different. 852 * For a one-sided test, divide the returned value by 2.</p> 853 * <p> 854 * See {@link #homoscedasticT(double[], double[])} for the formula used to 855 * compute the t-statistic. The sum of the sample sizes minus 2 is used as 856 * the degrees of freedom.</p> 857 * <p> 858 * <strong>Usage Note:</strong><br> 859 * The validity of the p-value depends on the assumptions of the parametric 860 * t-test procedure, as discussed 861 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 862 * </p><p> 863 * <strong>Preconditions</strong>: <ul> 864 * <li>The datasets described by the two Univariates must each contain 865 * at least 2 observations. 866 * </li></ul> 867 * 868 * @param sampleStats1 StatisticalSummary describing data from the first sample 869 * @param sampleStats2 StatisticalSummary describing data from the second sample 870 * @return p-value for t-test 871 * @throws NullArgumentException if the sample statistics are <code>null</code> 872 * @throws NumberIsTooSmallException if the number of samples is < 2 873 * @throws MaxCountExceededException if an error occurs computing the p-value 874 */ 875 public double homoscedasticTTest(final StatisticalSummary sampleStats1, 876 final StatisticalSummary sampleStats2) 877 throws NullArgumentException, NumberIsTooSmallException, 878 MaxCountExceededException { 879 880 checkSampleData(sampleStats1); 881 checkSampleData(sampleStats2); 882 return homoscedasticTTest(sampleStats1.getMean(), 883 sampleStats2.getMean(), 884 sampleStats1.getVariance(), 885 sampleStats2.getVariance(), 886 sampleStats1.getN(), sampleStats2.getN()); 887 } 888 889 /** 890 * Performs a 891 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 892 * two-sided t-test</a> evaluating the null hypothesis that 893 * <code>sampleStats1</code> and <code>sampleStats2</code> describe 894 * datasets drawn from populations with the same mean, with significance 895 * level <code>alpha</code>. This test does not assume that the 896 * subpopulation variances are equal. To perform the test under the equal 897 * variances assumption, use 898 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. 899 * <p> 900 * Returns <code>true</code> iff the null hypothesis that the means are 901 * equal can be rejected with confidence <code>1 - alpha</code>. To 902 * perform a 1-sided test, use <code>alpha * 2</code></p> 903 * <p> 904 * See {@link #t(double[], double[])} for the formula used to compute the 905 * t-statistic. Degrees of freedom are approximated using the 906 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 907 * Welch-Satterthwaite approximation.</a></p> 908 * <p> 909 * <strong>Examples:</strong><br><ol> 910 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 911 * the 95%, use 912 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code> 913 * </li> 914 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> 915 * at the 99% level, first verify that the measured mean of 916 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 917 * and then use 918 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code> 919 * </li></ol> 920 * <p> 921 * <strong>Usage Note:</strong><br> 922 * The validity of the test depends on the assumptions of the parametric 923 * t-test procedure, as discussed 924 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 925 * here</a></p> 926 * <p> 927 * <strong>Preconditions</strong>: <ul> 928 * <li>The datasets described by the two Univariates must each contain 929 * at least 2 observations. 930 * </li> 931 * <li> <code> 0 < alpha < 0.5 </code> 932 * </li></ul> 933 * 934 * @param sampleStats1 StatisticalSummary describing sample data values 935 * @param sampleStats2 StatisticalSummary describing sample data values 936 * @param alpha significance level of the test 937 * @return true if the null hypothesis can be rejected with 938 * confidence 1 - alpha 939 * @throws NullArgumentException if the sample statistics are <code>null</code> 940 * @throws NumberIsTooSmallException if the number of samples is < 2 941 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 942 * @throws MaxCountExceededException if an error occurs computing the p-value 943 */ 944 public boolean tTest(final StatisticalSummary sampleStats1, 945 final StatisticalSummary sampleStats2, 946 final double alpha) 947 throws NullArgumentException, NumberIsTooSmallException, 948 OutOfRangeException, MaxCountExceededException { 949 950 checkSignificanceLevel(alpha); 951 return tTest(sampleStats1, sampleStats2) < alpha; 952 } 953 954 //----------------------------------------------- Protected methods 955 956 /** 957 * Computes approximate degrees of freedom for 2-sample t-test. 958 * 959 * @param v1 first sample variance 960 * @param v2 second sample variance 961 * @param n1 first sample n 962 * @param n2 second sample n 963 * @return approximate degrees of freedom 964 */ 965 protected double df(double v1, double v2, double n1, double n2) { 966 return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / 967 ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / 968 (n2 * n2 * (n2 - 1d))); 969 } 970 971 /** 972 * Computes t test statistic for 1-sample t-test. 973 * 974 * @param m sample mean 975 * @param mu constant to test against 976 * @param v sample variance 977 * @param n sample n 978 * @return t test statistic 979 */ 980 protected double t(final double m, final double mu, 981 final double v, final double n) { 982 return (m - mu) / JdkMath.sqrt(v / n); 983 } 984 985 /** 986 * Computes t test statistic for 2-sample t-test. 987 * <p> 988 * Does not assume that subpopulation variances are equal.</p> 989 * 990 * @param m1 first sample mean 991 * @param m2 second sample mean 992 * @param v1 first sample variance 993 * @param v2 second sample variance 994 * @param n1 first sample n 995 * @param n2 second sample n 996 * @return t test statistic 997 */ 998 protected double t(final double m1, final double m2, 999 final double v1, final double v2, 1000 final double n1, final double n2) { 1001 return (m1 - m2) / JdkMath.sqrt((v1 / n1) + (v2 / n2)); 1002 } 1003 1004 /** 1005 * Computes t test statistic for 2-sample t-test under the hypothesis 1006 * of equal subpopulation variances. 1007 * 1008 * @param m1 first sample mean 1009 * @param m2 second sample mean 1010 * @param v1 first sample variance 1011 * @param v2 second sample variance 1012 * @param n1 first sample n 1013 * @param n2 second sample n 1014 * @return t test statistic 1015 */ 1016 protected double homoscedasticT(final double m1, final double m2, 1017 final double v1, final double v2, 1018 final double n1, final double n2) { 1019 final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); 1020 return (m1 - m2) / JdkMath.sqrt(pooledVariance * (1d / n1 + 1d / n2)); 1021 } 1022 1023 /** 1024 * Computes p-value for 2-sided, 1-sample t-test. 1025 * 1026 * @param m sample mean 1027 * @param mu constant to test against 1028 * @param v sample variance 1029 * @param n sample n 1030 * @return p-value 1031 * @throws MaxCountExceededException if an error occurs computing the p-value 1032 * @throws MathIllegalArgumentException if n is not greater than 1 1033 */ 1034 protected double tTest(final double m, final double mu, 1035 final double v, final double n) 1036 throws MaxCountExceededException, MathIllegalArgumentException { 1037 1038 final double t = JdkMath.abs(t(m, mu, v, n)); 1039 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 1040 final TDistribution distribution = TDistribution.of(n - 1); 1041 return 2.0 * distribution.cumulativeProbability(-t); 1042 } 1043 1044 /** 1045 * Computes p-value for 2-sided, 2-sample t-test. 1046 * <p> 1047 * Does not assume subpopulation variances are equal. Degrees of freedom 1048 * are estimated from the data.</p> 1049 * 1050 * @param m1 first sample mean 1051 * @param m2 second sample mean 1052 * @param v1 first sample variance 1053 * @param v2 second sample variance 1054 * @param n1 first sample n 1055 * @param n2 second sample n 1056 * @return p-value 1057 * @throws MaxCountExceededException if an error occurs computing the p-value 1058 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not 1059 * strictly positive 1060 */ 1061 protected double tTest(final double m1, final double m2, 1062 final double v1, final double v2, 1063 final double n1, final double n2) 1064 throws MaxCountExceededException, NotStrictlyPositiveException { 1065 1066 final double t = JdkMath.abs(t(m1, m2, v1, v2, n1, n2)); 1067 final double degreesOfFreedom = df(v1, v2, n1, n2); 1068 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 1069 final TDistribution distribution = TDistribution.of(degreesOfFreedom); 1070 return 2.0 * distribution.cumulativeProbability(-t); 1071 } 1072 1073 /** 1074 * Computes p-value for 2-sided, 2-sample t-test, under the assumption 1075 * of equal subpopulation variances. 1076 * <p> 1077 * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> 1078 * 1079 * @param m1 first sample mean 1080 * @param m2 second sample mean 1081 * @param v1 first sample variance 1082 * @param v2 second sample variance 1083 * @param n1 first sample n 1084 * @param n2 second sample n 1085 * @return p-value 1086 * @throws MaxCountExceededException if an error occurs computing the p-value 1087 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not 1088 * strictly positive 1089 */ 1090 protected double homoscedasticTTest(double m1, double m2, 1091 double v1, double v2, 1092 double n1, double n2) 1093 throws MaxCountExceededException, NotStrictlyPositiveException { 1094 1095 final double t = JdkMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); 1096 final double degreesOfFreedom = n1 + n2 - 2; 1097 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 1098 final TDistribution distribution = TDistribution.of(degreesOfFreedom); 1099 return 2.0 * distribution.cumulativeProbability(-t); 1100 } 1101 1102 /** 1103 * Check significance level. 1104 * 1105 * @param alpha significance level 1106 * @throws OutOfRangeException if the significance level is out of bounds. 1107 */ 1108 private void checkSignificanceLevel(final double alpha) 1109 throws OutOfRangeException { 1110 1111 if (alpha <= 0 || alpha > 0.5) { 1112 throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL, 1113 alpha, 0.0, 0.5); 1114 } 1115 } 1116 1117 /** 1118 * Check sample data. 1119 * 1120 * @param data Sample data. 1121 * @throws NullArgumentException if {@code data} is {@code null}. 1122 * @throws NumberIsTooSmallException if there is not enough sample data. 1123 */ 1124 private void checkSampleData(final double[] data) 1125 throws NullArgumentException, NumberIsTooSmallException { 1126 1127 if (data == null) { 1128 throw new NullArgumentException(); 1129 } 1130 if (data.length < 2) { 1131 throw new NumberIsTooSmallException( 1132 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC, 1133 data.length, 2, true); 1134 } 1135 } 1136 1137 /** 1138 * Check sample data. 1139 * 1140 * @param stat Statistical summary. 1141 * @throws NullArgumentException if {@code data} is {@code null}. 1142 * @throws NumberIsTooSmallException if there is not enough sample data. 1143 */ 1144 private void checkSampleData(final StatisticalSummary stat) 1145 throws NullArgumentException, NumberIsTooSmallException { 1146 1147 if (stat == null) { 1148 throw new NullArgumentException(); 1149 } 1150 if (stat.getN() < 2) { 1151 throw new NumberIsTooSmallException( 1152 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC, 1153 stat.getN(), 2, true); 1154 } 1155 } 1156 }