001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.stat.inference; 018 019import org.apache.commons.math3.distribution.TDistribution; 020import org.apache.commons.math3.exception.DimensionMismatchException; 021import org.apache.commons.math3.exception.MathIllegalArgumentException; 022import org.apache.commons.math3.exception.MaxCountExceededException; 023import org.apache.commons.math3.exception.NoDataException; 024import org.apache.commons.math3.exception.NotStrictlyPositiveException; 025import org.apache.commons.math3.exception.NullArgumentException; 026import org.apache.commons.math3.exception.NumberIsTooSmallException; 027import org.apache.commons.math3.exception.OutOfRangeException; 028import org.apache.commons.math3.exception.util.LocalizedFormats; 029import org.apache.commons.math3.stat.StatUtils; 030import org.apache.commons.math3.stat.descriptive.StatisticalSummary; 031import org.apache.commons.math3.util.FastMath; 032 033/** 034 * An implementation for Student's t-tests. 035 * <p> 036 * Tests can be:<ul> 037 * <li>One-sample or two-sample</li> 038 * <li>One-sided or two-sided</li> 039 * <li>Paired or unpaired (for two-sample tests)</li> 040 * <li>Homoscedastic (equal variance assumption) or heteroscedastic 041 * (for two sample tests)</li> 042 * <li>Fixed significance level (boolean-valued) or returning p-values. 043 * </li></ul></p> 044 * <p> 045 * Test statistics are available for all tests. Methods including "Test" in 046 * in their names perform tests, all other methods return t-statistics. Among 047 * the "Test" methods, <code>double-</code>valued methods return p-values; 048 * <code>boolean-</code>valued methods perform fixed significance level tests. 049 * Significance levels are always specified as numbers between 0 and 0.5 050 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p> 051 * <p> 052 * Input to tests can be either <code>double[]</code> arrays or 053 * {@link StatisticalSummary} instances.</p><p> 054 * Uses commons-math {@link org.apache.commons.math3.distribution.TDistribution} 055 * implementation to estimate exact p-values.</p> 056 * 057 */ 058public class TTest { 059 /** 060 * Computes a paired, 2-sample t-statistic based on the data in the input 061 * arrays. The t-statistic returned is equivalent to what would be returned by 062 * computing the one-sample t-statistic {@link #t(double, double[])}, with 063 * <code>mu = 0</code> and the sample array consisting of the (signed) 064 * differences between corresponding entries in <code>sample1</code> and 065 * <code>sample2.</code> 066 * <p> 067 * <strong>Preconditions</strong>: <ul> 068 * <li>The input arrays must have the same length and their common length 069 * must be at least 2. 070 * </li></ul></p> 071 * 072 * @param sample1 array of sample data values 073 * @param sample2 array of sample data values 074 * @return t statistic 075 * @throws NullArgumentException if the arrays are <code>null</code> 076 * @throws NoDataException if the arrays are empty 077 * @throws DimensionMismatchException if the length of the arrays is not equal 078 * @throws NumberIsTooSmallException if the length of the arrays is < 2 079 */ 080 public double pairedT(final double[] sample1, final double[] sample2) 081 throws NullArgumentException, NoDataException, 082 DimensionMismatchException, NumberIsTooSmallException { 083 084 checkSampleData(sample1); 085 checkSampleData(sample2); 086 double meanDifference = StatUtils.meanDifference(sample1, sample2); 087 return t(meanDifference, 0, 088 StatUtils.varianceDifference(sample1, sample2, meanDifference), 089 sample1.length); 090 091 } 092 093 /** 094 * Returns the <i>observed significance level</i>, or 095 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test 096 * based on the data in the input arrays. 097 * <p> 098 * The number returned is the smallest significance level 099 * at which one can reject the null hypothesis that the mean of the paired 100 * differences is 0 in favor of the two-sided alternative that the mean paired 101 * difference is not equal to 0. For a one-sided test, divide the returned 102 * value by 2.</p> 103 * <p> 104 * This test is equivalent to a one-sample t-test computed using 105 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample 106 * array consisting of the signed differences between corresponding elements of 107 * <code>sample1</code> and <code>sample2.</code></p> 108 * <p> 109 * <strong>Usage Note:</strong><br> 110 * The validity of the p-value depends on the assumptions of the parametric 111 * t-test procedure, as discussed 112 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 113 * here</a></p> 114 * <p> 115 * <strong>Preconditions</strong>: <ul> 116 * <li>The input array lengths must be the same and their common length must 117 * be at least 2. 118 * </li></ul></p> 119 * 120 * @param sample1 array of sample data values 121 * @param sample2 array of sample data values 122 * @return p-value for t-test 123 * @throws NullArgumentException if the arrays are <code>null</code> 124 * @throws NoDataException if the arrays are empty 125 * @throws DimensionMismatchException if the length of the arrays is not equal 126 * @throws NumberIsTooSmallException if the length of the arrays is < 2 127 * @throws MaxCountExceededException if an error occurs computing the p-value 128 */ 129 public double pairedTTest(final double[] sample1, final double[] sample2) 130 throws NullArgumentException, NoDataException, DimensionMismatchException, 131 NumberIsTooSmallException, MaxCountExceededException { 132 133 double meanDifference = StatUtils.meanDifference(sample1, sample2); 134 return tTest(meanDifference, 0, 135 StatUtils.varianceDifference(sample1, sample2, meanDifference), 136 sample1.length); 137 138 } 139 140 /** 141 * Performs a paired t-test evaluating the null hypothesis that the 142 * mean of the paired differences between <code>sample1</code> and 143 * <code>sample2</code> is 0 in favor of the two-sided alternative that the 144 * mean paired difference is not equal to 0, with significance level 145 * <code>alpha</code>. 146 * <p> 147 * Returns <code>true</code> iff the null hypothesis can be rejected with 148 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 149 * <code>alpha * 2</code></p> 150 * <p> 151 * <strong>Usage Note:</strong><br> 152 * The validity of the test depends on the assumptions of the parametric 153 * t-test procedure, as discussed 154 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 155 * here</a></p> 156 * <p> 157 * <strong>Preconditions</strong>: <ul> 158 * <li>The input array lengths must be the same and their common length 159 * must be at least 2. 160 * </li> 161 * <li> <code> 0 < alpha < 0.5 </code> 162 * </li></ul></p> 163 * 164 * @param sample1 array of sample data values 165 * @param sample2 array of sample data values 166 * @param alpha significance level of the test 167 * @return true if the null hypothesis can be rejected with 168 * confidence 1 - alpha 169 * @throws NullArgumentException if the arrays are <code>null</code> 170 * @throws NoDataException if the arrays are empty 171 * @throws DimensionMismatchException if the length of the arrays is not equal 172 * @throws NumberIsTooSmallException if the length of the arrays is < 2 173 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 174 * @throws MaxCountExceededException if an error occurs computing the p-value 175 */ 176 public boolean pairedTTest(final double[] sample1, final double[] sample2, 177 final double alpha) 178 throws NullArgumentException, NoDataException, DimensionMismatchException, 179 NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException { 180 181 checkSignificanceLevel(alpha); 182 return pairedTTest(sample1, sample2) < alpha; 183 184 } 185 186 /** 187 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 188 * t statistic </a> given observed values and a comparison constant. 189 * <p> 190 * This statistic can be used to perform a one sample t-test for the mean. 191 * </p><p> 192 * <strong>Preconditions</strong>: <ul> 193 * <li>The observed array length must be at least 2. 194 * </li></ul></p> 195 * 196 * @param mu comparison constant 197 * @param observed array of values 198 * @return t statistic 199 * @throws NullArgumentException if <code>observed</code> is <code>null</code> 200 * @throws NumberIsTooSmallException if the length of <code>observed</code> is < 2 201 */ 202 public double t(final double mu, final double[] observed) 203 throws NullArgumentException, NumberIsTooSmallException { 204 205 checkSampleData(observed); 206 // No try-catch or advertised exception because args have just been checked 207 return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), 208 observed.length); 209 210 } 211 212 /** 213 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 214 * t statistic </a> to use in comparing the mean of the dataset described by 215 * <code>sampleStats</code> to <code>mu</code>. 216 * <p> 217 * This statistic can be used to perform a one sample t-test for the mean. 218 * </p><p> 219 * <strong>Preconditions</strong>: <ul> 220 * <li><code>observed.getN() ≥ 2</code>. 221 * </li></ul></p> 222 * 223 * @param mu comparison constant 224 * @param sampleStats DescriptiveStatistics holding sample summary statitstics 225 * @return t statistic 226 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 227 * @throws NumberIsTooSmallException if the number of samples is < 2 228 */ 229 public double t(final double mu, final StatisticalSummary sampleStats) 230 throws NullArgumentException, NumberIsTooSmallException { 231 232 checkSampleData(sampleStats); 233 return t(sampleStats.getMean(), mu, sampleStats.getVariance(), 234 sampleStats.getN()); 235 236 } 237 238 /** 239 * Computes a 2-sample t statistic, under the hypothesis of equal 240 * subpopulation variances. To compute a t-statistic without the 241 * equal variances hypothesis, use {@link #t(double[], double[])}. 242 * <p> 243 * This statistic can be used to perform a (homoscedastic) two-sample 244 * t-test to compare sample means.</p> 245 * <p> 246 * The t-statistic is</p> 247 * <p> 248 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 249 * </p><p> 250 * where <strong><code>n1</code></strong> is the size of first sample; 251 * <strong><code> n2</code></strong> is the size of second sample; 252 * <strong><code> m1</code></strong> is the mean of first sample; 253 * <strong><code> m2</code></strong> is the mean of second sample</li> 254 * </ul> 255 * and <strong><code>var</code></strong> is the pooled variance estimate: 256 * </p><p> 257 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 258 * </p><p> 259 * with <strong><code>var1</code></strong> the variance of the first sample and 260 * <strong><code>var2</code></strong> the variance of the second sample. 261 * </p><p> 262 * <strong>Preconditions</strong>: <ul> 263 * <li>The observed array lengths must both be at least 2. 264 * </li></ul></p> 265 * 266 * @param sample1 array of sample data values 267 * @param sample2 array of sample data values 268 * @return t statistic 269 * @throws NullArgumentException if the arrays are <code>null</code> 270 * @throws NumberIsTooSmallException if the length of the arrays is < 2 271 */ 272 public double homoscedasticT(final double[] sample1, final double[] sample2) 273 throws NullArgumentException, NumberIsTooSmallException { 274 275 checkSampleData(sample1); 276 checkSampleData(sample2); 277 // No try-catch or advertised exception because args have just been checked 278 return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2), 279 StatUtils.variance(sample1), StatUtils.variance(sample2), 280 sample1.length, sample2.length); 281 282 } 283 284 /** 285 * Computes a 2-sample t statistic, without the hypothesis of equal 286 * subpopulation variances. To compute a t-statistic assuming equal 287 * variances, use {@link #homoscedasticT(double[], double[])}. 288 * <p> 289 * This statistic can be used to perform a two-sample t-test to compare 290 * sample means.</p> 291 * <p> 292 * The t-statistic is</p> 293 * <p> 294 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 295 * </p><p> 296 * where <strong><code>n1</code></strong> is the size of the first sample 297 * <strong><code> n2</code></strong> is the size of the second sample; 298 * <strong><code> m1</code></strong> is the mean of the first sample; 299 * <strong><code> m2</code></strong> is the mean of the second sample; 300 * <strong><code> var1</code></strong> is the variance of the first sample; 301 * <strong><code> var2</code></strong> is the variance of the second sample; 302 * </p><p> 303 * <strong>Preconditions</strong>: <ul> 304 * <li>The observed array lengths must both be at least 2. 305 * </li></ul></p> 306 * 307 * @param sample1 array of sample data values 308 * @param sample2 array of sample data values 309 * @return t statistic 310 * @throws NullArgumentException if the arrays are <code>null</code> 311 * @throws NumberIsTooSmallException if the length of the arrays is < 2 312 */ 313 public double t(final double[] sample1, final double[] sample2) 314 throws NullArgumentException, NumberIsTooSmallException { 315 316 checkSampleData(sample1); 317 checkSampleData(sample2); 318 // No try-catch or advertised exception because args have just been checked 319 return t(StatUtils.mean(sample1), StatUtils.mean(sample2), 320 StatUtils.variance(sample1), StatUtils.variance(sample2), 321 sample1.length, sample2.length); 322 323 } 324 325 /** 326 * Computes a 2-sample t statistic </a>, comparing the means of the datasets 327 * described by two {@link StatisticalSummary} instances, without the 328 * assumption of equal subpopulation variances. Use 329 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to 330 * compute a t-statistic under the equal variances assumption. 331 * <p> 332 * This statistic can be used to perform a two-sample t-test to compare 333 * sample means.</p> 334 * <p> 335 * The returned t-statistic is</p> 336 * <p> 337 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 338 * </p><p> 339 * where <strong><code>n1</code></strong> is the size of the first sample; 340 * <strong><code> n2</code></strong> is the size of the second sample; 341 * <strong><code> m1</code></strong> is the mean of the first sample; 342 * <strong><code> m2</code></strong> is the mean of the second sample 343 * <strong><code> var1</code></strong> is the variance of the first sample; 344 * <strong><code> var2</code></strong> is the variance of the second sample 345 * </p><p> 346 * <strong>Preconditions</strong>: <ul> 347 * <li>The datasets described by the two Univariates must each contain 348 * at least 2 observations. 349 * </li></ul></p> 350 * 351 * @param sampleStats1 StatisticalSummary describing data from the first sample 352 * @param sampleStats2 StatisticalSummary describing data from the second sample 353 * @return t statistic 354 * @throws NullArgumentException if the sample statistics are <code>null</code> 355 * @throws NumberIsTooSmallException if the number of samples is < 2 356 */ 357 public double t(final StatisticalSummary sampleStats1, 358 final StatisticalSummary sampleStats2) 359 throws NullArgumentException, NumberIsTooSmallException { 360 361 checkSampleData(sampleStats1); 362 checkSampleData(sampleStats2); 363 return t(sampleStats1.getMean(), sampleStats2.getMean(), 364 sampleStats1.getVariance(), sampleStats2.getVariance(), 365 sampleStats1.getN(), sampleStats2.getN()); 366 367 } 368 369 /** 370 * Computes a 2-sample t statistic, comparing the means of the datasets 371 * described by two {@link StatisticalSummary} instances, under the 372 * assumption of equal subpopulation variances. To compute a t-statistic 373 * without the equal variances assumption, use 374 * {@link #t(StatisticalSummary, StatisticalSummary)}. 375 * <p> 376 * This statistic can be used to perform a (homoscedastic) two-sample 377 * t-test to compare sample means.</p> 378 * <p> 379 * The t-statistic returned is</p> 380 * <p> 381 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 382 * </p><p> 383 * where <strong><code>n1</code></strong> is the size of first sample; 384 * <strong><code> n2</code></strong> is the size of second sample; 385 * <strong><code> m1</code></strong> is the mean of first sample; 386 * <strong><code> m2</code></strong> is the mean of second sample 387 * and <strong><code>var</code></strong> is the pooled variance estimate: 388 * </p><p> 389 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 390 * </p><p> 391 * with <strong><code>var1</code></strong> the variance of the first sample and 392 * <strong><code>var2</code></strong> the variance of the second sample. 393 * </p><p> 394 * <strong>Preconditions</strong>: <ul> 395 * <li>The datasets described by the two Univariates must each contain 396 * at least 2 observations. 397 * </li></ul></p> 398 * 399 * @param sampleStats1 StatisticalSummary describing data from the first sample 400 * @param sampleStats2 StatisticalSummary describing data from the second sample 401 * @return t statistic 402 * @throws NullArgumentException if the sample statistics are <code>null</code> 403 * @throws NumberIsTooSmallException if the number of samples is < 2 404 */ 405 public double homoscedasticT(final StatisticalSummary sampleStats1, 406 final StatisticalSummary sampleStats2) 407 throws NullArgumentException, NumberIsTooSmallException { 408 409 checkSampleData(sampleStats1); 410 checkSampleData(sampleStats2); 411 return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), 412 sampleStats1.getVariance(), sampleStats2.getVariance(), 413 sampleStats1.getN(), sampleStats2.getN()); 414 415 } 416 417 /** 418 * Returns the <i>observed significance level</i>, or 419 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 420 * comparing the mean of the input array with the constant <code>mu</code>. 421 * <p> 422 * The number returned is the smallest significance level 423 * at which one can reject the null hypothesis that the mean equals 424 * <code>mu</code> in favor of the two-sided alternative that the mean 425 * is different from <code>mu</code>. For a one-sided test, divide the 426 * returned value by 2.</p> 427 * <p> 428 * <strong>Usage Note:</strong><br> 429 * The validity of the test depends on the assumptions of the parametric 430 * t-test procedure, as discussed 431 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 432 * </p><p> 433 * <strong>Preconditions</strong>: <ul> 434 * <li>The observed array length must be at least 2. 435 * </li></ul></p> 436 * 437 * @param mu constant value to compare sample mean against 438 * @param sample array of sample data values 439 * @return p-value 440 * @throws NullArgumentException if the sample array is <code>null</code> 441 * @throws NumberIsTooSmallException if the length of the array is < 2 442 * @throws MaxCountExceededException if an error occurs computing the p-value 443 */ 444 public double tTest(final double mu, final double[] sample) 445 throws NullArgumentException, NumberIsTooSmallException, 446 MaxCountExceededException { 447 448 checkSampleData(sample); 449 // No try-catch or advertised exception because args have just been checked 450 return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample), 451 sample.length); 452 453 } 454 455 /** 456 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 457 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from 458 * which <code>sample</code> is drawn equals <code>mu</code>. 459 * <p> 460 * Returns <code>true</code> iff the null hypothesis can be 461 * rejected with confidence <code>1 - alpha</code>. To 462 * perform a 1-sided test, use <code>alpha * 2</code></p> 463 * <p> 464 * <strong>Examples:</strong><br><ol> 465 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 466 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code> 467 * </li> 468 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 469 * at the 99% level, first verify that the measured sample mean is less 470 * than <code>mu</code> and then use 471 * <br><code>tTest(mu, sample, 0.02) </code> 472 * </li></ol></p> 473 * <p> 474 * <strong>Usage Note:</strong><br> 475 * The validity of the test depends on the assumptions of the one-sample 476 * parametric t-test procedure, as discussed 477 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 478 * </p><p> 479 * <strong>Preconditions</strong>: <ul> 480 * <li>The observed array length must be at least 2. 481 * </li></ul></p> 482 * 483 * @param mu constant value to compare sample mean against 484 * @param sample array of sample data values 485 * @param alpha significance level of the test 486 * @return p-value 487 * @throws NullArgumentException if the sample array is <code>null</code> 488 * @throws NumberIsTooSmallException if the length of the array is < 2 489 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 490 * @throws MaxCountExceededException if an error computing the p-value 491 */ 492 public boolean tTest(final double mu, final double[] sample, final double alpha) 493 throws NullArgumentException, NumberIsTooSmallException, 494 OutOfRangeException, MaxCountExceededException { 495 496 checkSignificanceLevel(alpha); 497 return tTest(mu, sample) < alpha; 498 499 } 500 501 /** 502 * Returns the <i>observed significance level</i>, or 503 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 504 * comparing the mean of the dataset described by <code>sampleStats</code> 505 * with the constant <code>mu</code>. 506 * <p> 507 * The number returned is the smallest significance level 508 * at which one can reject the null hypothesis that the mean equals 509 * <code>mu</code> in favor of the two-sided alternative that the mean 510 * is different from <code>mu</code>. For a one-sided test, divide the 511 * returned value by 2.</p> 512 * <p> 513 * <strong>Usage Note:</strong><br> 514 * The validity of the test depends on the assumptions of the parametric 515 * t-test procedure, as discussed 516 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 517 * here</a></p> 518 * <p> 519 * <strong>Preconditions</strong>: <ul> 520 * <li>The sample must contain at least 2 observations. 521 * </li></ul></p> 522 * 523 * @param mu constant value to compare sample mean against 524 * @param sampleStats StatisticalSummary describing sample data 525 * @return p-value 526 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 527 * @throws NumberIsTooSmallException if the number of samples is < 2 528 * @throws MaxCountExceededException if an error occurs computing the p-value 529 */ 530 public double tTest(final double mu, final StatisticalSummary sampleStats) 531 throws NullArgumentException, NumberIsTooSmallException, 532 MaxCountExceededException { 533 534 checkSampleData(sampleStats); 535 return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), 536 sampleStats.getN()); 537 538 } 539 540 /** 541 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 542 * two-sided t-test</a> evaluating the null hypothesis that the mean of the 543 * population from which the dataset described by <code>stats</code> is 544 * drawn equals <code>mu</code>. 545 * <p> 546 * Returns <code>true</code> iff the null hypothesis can be rejected with 547 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 548 * <code>alpha * 2.</code></p> 549 * <p> 550 * <strong>Examples:</strong><br><ol> 551 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 552 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code> 553 * </li> 554 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 555 * at the 99% level, first verify that the measured sample mean is less 556 * than <code>mu</code> and then use 557 * <br><code>tTest(mu, sampleStats, 0.02) </code> 558 * </li></ol></p> 559 * <p> 560 * <strong>Usage Note:</strong><br> 561 * The validity of the test depends on the assumptions of the one-sample 562 * parametric t-test procedure, as discussed 563 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 564 * </p><p> 565 * <strong>Preconditions</strong>: <ul> 566 * <li>The sample must include at least 2 observations. 567 * </li></ul></p> 568 * 569 * @param mu constant value to compare sample mean against 570 * @param sampleStats StatisticalSummary describing sample data values 571 * @param alpha significance level of the test 572 * @return p-value 573 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 574 * @throws NumberIsTooSmallException if the number of samples is < 2 575 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 576 * @throws MaxCountExceededException if an error occurs computing the p-value 577 */ 578 public boolean tTest(final double mu, final StatisticalSummary sampleStats, 579 final double alpha) 580 throws NullArgumentException, NumberIsTooSmallException, 581 OutOfRangeException, MaxCountExceededException { 582 583 checkSignificanceLevel(alpha); 584 return tTest(mu, sampleStats) < alpha; 585 586 } 587 588 /** 589 * Returns the <i>observed significance level</i>, or 590 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 591 * comparing the means of the input arrays. 592 * <p> 593 * The number returned is the smallest significance level 594 * at which one can reject the null hypothesis that the two means are 595 * equal in favor of the two-sided alternative that they are different. 596 * For a one-sided test, divide the returned value by 2.</p> 597 * <p> 598 * The test does not assume that the underlying popuation variances are 599 * equal and it uses approximated degrees of freedom computed from the 600 * sample data to compute the p-value. The t-statistic used is as defined in 601 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation 602 * to the degrees of freedom is used, 603 * as described 604 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 605 * here.</a> To perform the test under the assumption of equal subpopulation 606 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p> 607 * <p> 608 * <strong>Usage Note:</strong><br> 609 * The validity of the p-value depends on the assumptions of the parametric 610 * t-test procedure, as discussed 611 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 612 * here</a></p> 613 * <p> 614 * <strong>Preconditions</strong>: <ul> 615 * <li>The observed array lengths must both be at least 2. 616 * </li></ul></p> 617 * 618 * @param sample1 array of sample data values 619 * @param sample2 array of sample data values 620 * @return p-value for t-test 621 * @throws NullArgumentException if the arrays are <code>null</code> 622 * @throws NumberIsTooSmallException if the length of the arrays is < 2 623 * @throws MaxCountExceededException if an error occurs computing the p-value 624 */ 625 public double tTest(final double[] sample1, final double[] sample2) 626 throws NullArgumentException, NumberIsTooSmallException, 627 MaxCountExceededException { 628 629 checkSampleData(sample1); 630 checkSampleData(sample2); 631 // No try-catch or advertised exception because args have just been checked 632 return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), 633 StatUtils.variance(sample1), StatUtils.variance(sample2), 634 sample1.length, sample2.length); 635 636 } 637 638 /** 639 * Returns the <i>observed significance level</i>, or 640 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 641 * comparing the means of the input arrays, under the assumption that 642 * the two samples are drawn from subpopulations with equal variances. 643 * To perform the test without the equal variances assumption, use 644 * {@link #tTest(double[], double[])}.</p> 645 * <p> 646 * The number returned is the smallest significance level 647 * at which one can reject the null hypothesis that the two means are 648 * equal in favor of the two-sided alternative that they are different. 649 * For a one-sided test, divide the returned value by 2.</p> 650 * <p> 651 * A pooled variance estimate is used to compute the t-statistic. See 652 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes 653 * minus 2 is used as the degrees of freedom.</p> 654 * <p> 655 * <strong>Usage Note:</strong><br> 656 * The validity of the p-value depends on the assumptions of the parametric 657 * t-test procedure, as discussed 658 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 659 * here</a></p> 660 * <p> 661 * <strong>Preconditions</strong>: <ul> 662 * <li>The observed array lengths must both be at least 2. 663 * </li></ul></p> 664 * 665 * @param sample1 array of sample data values 666 * @param sample2 array of sample data values 667 * @return p-value for t-test 668 * @throws NullArgumentException if the arrays are <code>null</code> 669 * @throws NumberIsTooSmallException if the length of the arrays is < 2 670 * @throws MaxCountExceededException if an error occurs computing the p-value 671 */ 672 public double homoscedasticTTest(final double[] sample1, final double[] sample2) 673 throws NullArgumentException, NumberIsTooSmallException, 674 MaxCountExceededException { 675 676 checkSampleData(sample1); 677 checkSampleData(sample2); 678 // No try-catch or advertised exception because args have just been checked 679 return homoscedasticTTest(StatUtils.mean(sample1), 680 StatUtils.mean(sample2), 681 StatUtils.variance(sample1), 682 StatUtils.variance(sample2), 683 sample1.length, sample2.length); 684 685 } 686 687 /** 688 * Performs a 689 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 690 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 691 * and <code>sample2</code> are drawn from populations with the same mean, 692 * with significance level <code>alpha</code>. This test does not assume 693 * that the subpopulation variances are equal. To perform the test assuming 694 * equal variances, use 695 * {@link #homoscedasticTTest(double[], double[], double)}. 696 * <p> 697 * Returns <code>true</code> iff the null hypothesis that the means are 698 * equal can be rejected with confidence <code>1 - alpha</code>. To 699 * perform a 1-sided test, use <code>alpha * 2</code></p> 700 * <p> 701 * See {@link #t(double[], double[])} for the formula used to compute the 702 * t-statistic. Degrees of freedom are approximated using the 703 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 704 * Welch-Satterthwaite approximation.</a></p> 705 * <p> 706 * <strong>Examples:</strong><br><ol> 707 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 708 * the 95% level, use 709 * <br><code>tTest(sample1, sample2, 0.05). </code> 710 * </li> 711 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>, 712 * at the 99% level, first verify that the measured mean of <code>sample 1</code> 713 * is less than the mean of <code>sample 2</code> and then use 714 * <br><code>tTest(sample1, sample2, 0.02) </code> 715 * </li></ol></p> 716 * <p> 717 * <strong>Usage Note:</strong><br> 718 * The validity of the test depends on the assumptions of the parametric 719 * t-test procedure, as discussed 720 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 721 * here</a></p> 722 * <p> 723 * <strong>Preconditions</strong>: <ul> 724 * <li>The observed array lengths must both be at least 2. 725 * </li> 726 * <li> <code> 0 < alpha < 0.5 </code> 727 * </li></ul></p> 728 * 729 * @param sample1 array of sample data values 730 * @param sample2 array of sample data values 731 * @param alpha significance level of the test 732 * @return true if the null hypothesis can be rejected with 733 * confidence 1 - alpha 734 * @throws NullArgumentException if the arrays are <code>null</code> 735 * @throws NumberIsTooSmallException if the length of the arrays is < 2 736 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 737 * @throws MaxCountExceededException if an error occurs computing the p-value 738 */ 739 public boolean tTest(final double[] sample1, final double[] sample2, 740 final double alpha) 741 throws NullArgumentException, NumberIsTooSmallException, 742 OutOfRangeException, MaxCountExceededException { 743 744 checkSignificanceLevel(alpha); 745 return tTest(sample1, sample2) < alpha; 746 747 } 748 749 /** 750 * Performs a 751 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 752 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 753 * and <code>sample2</code> are drawn from populations with the same mean, 754 * with significance level <code>alpha</code>, assuming that the 755 * subpopulation variances are equal. Use 756 * {@link #tTest(double[], double[], double)} to perform the test without 757 * the assumption of equal variances. 758 * <p> 759 * Returns <code>true</code> iff the null hypothesis that the means are 760 * equal can be rejected with confidence <code>1 - alpha</code>. To 761 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test 762 * without the assumption of equal subpopulation variances, use 763 * {@link #tTest(double[], double[], double)}.</p> 764 * <p> 765 * A pooled variance estimate is used to compute the t-statistic. See 766 * {@link #t(double[], double[])} for the formula. The sum of the sample 767 * sizes minus 2 is used as the degrees of freedom.</p> 768 * <p> 769 * <strong>Examples:</strong><br><ol> 770 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 771 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code> 772 * </li> 773 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code> 774 * at the 99% level, first verify that the measured mean of 775 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 776 * and then use 777 * <br><code>tTest(sample1, sample2, 0.02) </code> 778 * </li></ol></p> 779 * <p> 780 * <strong>Usage Note:</strong><br> 781 * The validity of the test depends on the assumptions of the parametric 782 * t-test procedure, as discussed 783 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 784 * here</a></p> 785 * <p> 786 * <strong>Preconditions</strong>: <ul> 787 * <li>The observed array lengths must both be at least 2. 788 * </li> 789 * <li> <code> 0 < alpha < 0.5 </code> 790 * </li></ul></p> 791 * 792 * @param sample1 array of sample data values 793 * @param sample2 array of sample data values 794 * @param alpha significance level of the test 795 * @return true if the null hypothesis can be rejected with 796 * confidence 1 - alpha 797 * @throws NullArgumentException if the arrays are <code>null</code> 798 * @throws NumberIsTooSmallException if the length of the arrays is < 2 799 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 800 * @throws MaxCountExceededException if an error occurs computing the p-value 801 */ 802 public boolean homoscedasticTTest(final double[] sample1, final double[] sample2, 803 final double alpha) 804 throws NullArgumentException, NumberIsTooSmallException, 805 OutOfRangeException, MaxCountExceededException { 806 807 checkSignificanceLevel(alpha); 808 return homoscedasticTTest(sample1, sample2) < alpha; 809 810 } 811 812 /** 813 * Returns the <i>observed significance level</i>, or 814 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 815 * comparing the means of the datasets described by two StatisticalSummary 816 * instances. 817 * <p> 818 * The number returned is the smallest significance level 819 * at which one can reject the null hypothesis that the two means are 820 * equal in favor of the two-sided alternative that they are different. 821 * For a one-sided test, divide the returned value by 2.</p> 822 * <p> 823 * The test does not assume that the underlying population variances are 824 * equal and it uses approximated degrees of freedom computed from the 825 * sample data to compute the p-value. To perform the test assuming 826 * equal variances, use 827 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p> 828 * <p> 829 * <strong>Usage Note:</strong><br> 830 * The validity of the p-value depends on the assumptions of the parametric 831 * t-test procedure, as discussed 832 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 833 * here</a></p> 834 * <p> 835 * <strong>Preconditions</strong>: <ul> 836 * <li>The datasets described by the two Univariates must each contain 837 * at least 2 observations. 838 * </li></ul></p> 839 * 840 * @param sampleStats1 StatisticalSummary describing data from the first sample 841 * @param sampleStats2 StatisticalSummary describing data from the second sample 842 * @return p-value for t-test 843 * @throws NullArgumentException if the sample statistics are <code>null</code> 844 * @throws NumberIsTooSmallException if the number of samples is < 2 845 * @throws MaxCountExceededException if an error occurs computing the p-value 846 */ 847 public double tTest(final StatisticalSummary sampleStats1, 848 final StatisticalSummary sampleStats2) 849 throws NullArgumentException, NumberIsTooSmallException, 850 MaxCountExceededException { 851 852 checkSampleData(sampleStats1); 853 checkSampleData(sampleStats2); 854 return tTest(sampleStats1.getMean(), sampleStats2.getMean(), 855 sampleStats1.getVariance(), sampleStats2.getVariance(), 856 sampleStats1.getN(), sampleStats2.getN()); 857 858 } 859 860 /** 861 * Returns the <i>observed significance level</i>, or 862 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 863 * comparing the means of the datasets described by two StatisticalSummary 864 * instances, under the hypothesis of equal subpopulation variances. To 865 * perform a test without the equal variances assumption, use 866 * {@link #tTest(StatisticalSummary, StatisticalSummary)}. 867 * <p> 868 * The number returned is the smallest significance level 869 * at which one can reject the null hypothesis that the two means are 870 * equal in favor of the two-sided alternative that they are different. 871 * For a one-sided test, divide the returned value by 2.</p> 872 * <p> 873 * See {@link #homoscedasticT(double[], double[])} for the formula used to 874 * compute the t-statistic. The sum of the sample sizes minus 2 is used as 875 * the degrees of freedom.</p> 876 * <p> 877 * <strong>Usage Note:</strong><br> 878 * The validity of the p-value depends on the assumptions of the parametric 879 * t-test procedure, as discussed 880 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 881 * </p><p> 882 * <strong>Preconditions</strong>: <ul> 883 * <li>The datasets described by the two Univariates must each contain 884 * at least 2 observations. 885 * </li></ul></p> 886 * 887 * @param sampleStats1 StatisticalSummary describing data from the first sample 888 * @param sampleStats2 StatisticalSummary describing data from the second sample 889 * @return p-value for t-test 890 * @throws NullArgumentException if the sample statistics are <code>null</code> 891 * @throws NumberIsTooSmallException if the number of samples is < 2 892 * @throws MaxCountExceededException if an error occurs computing the p-value 893 */ 894 public double homoscedasticTTest(final StatisticalSummary sampleStats1, 895 final StatisticalSummary sampleStats2) 896 throws NullArgumentException, NumberIsTooSmallException, 897 MaxCountExceededException { 898 899 checkSampleData(sampleStats1); 900 checkSampleData(sampleStats2); 901 return homoscedasticTTest(sampleStats1.getMean(), 902 sampleStats2.getMean(), 903 sampleStats1.getVariance(), 904 sampleStats2.getVariance(), 905 sampleStats1.getN(), sampleStats2.getN()); 906 907 } 908 909 /** 910 * Performs a 911 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 912 * two-sided t-test</a> evaluating the null hypothesis that 913 * <code>sampleStats1</code> and <code>sampleStats2</code> describe 914 * datasets drawn from populations with the same mean, with significance 915 * level <code>alpha</code>. This test does not assume that the 916 * subpopulation variances are equal. To perform the test under the equal 917 * variances assumption, use 918 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. 919 * <p> 920 * Returns <code>true</code> iff the null hypothesis that the means are 921 * equal can be rejected with confidence <code>1 - alpha</code>. To 922 * perform a 1-sided test, use <code>alpha * 2</code></p> 923 * <p> 924 * See {@link #t(double[], double[])} for the formula used to compute the 925 * t-statistic. Degrees of freedom are approximated using the 926 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 927 * Welch-Satterthwaite approximation.</a></p> 928 * <p> 929 * <strong>Examples:</strong><br><ol> 930 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 931 * the 95%, use 932 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code> 933 * </li> 934 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> 935 * at the 99% level, first verify that the measured mean of 936 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 937 * and then use 938 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code> 939 * </li></ol></p> 940 * <p> 941 * <strong>Usage Note:</strong><br> 942 * The validity of the test depends on the assumptions of the parametric 943 * t-test procedure, as discussed 944 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 945 * here</a></p> 946 * <p> 947 * <strong>Preconditions</strong>: <ul> 948 * <li>The datasets described by the two Univariates must each contain 949 * at least 2 observations. 950 * </li> 951 * <li> <code> 0 < alpha < 0.5 </code> 952 * </li></ul></p> 953 * 954 * @param sampleStats1 StatisticalSummary describing sample data values 955 * @param sampleStats2 StatisticalSummary describing sample data values 956 * @param alpha significance level of the test 957 * @return true if the null hypothesis can be rejected with 958 * confidence 1 - alpha 959 * @throws NullArgumentException if the sample statistics are <code>null</code> 960 * @throws NumberIsTooSmallException if the number of samples is < 2 961 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5] 962 * @throws MaxCountExceededException if an error occurs computing the p-value 963 */ 964 public boolean tTest(final StatisticalSummary sampleStats1, 965 final StatisticalSummary sampleStats2, 966 final double alpha) 967 throws NullArgumentException, NumberIsTooSmallException, 968 OutOfRangeException, MaxCountExceededException { 969 970 checkSignificanceLevel(alpha); 971 return tTest(sampleStats1, sampleStats2) < alpha; 972 973 } 974 975 //----------------------------------------------- Protected methods 976 977 /** 978 * Computes approximate degrees of freedom for 2-sample t-test. 979 * 980 * @param v1 first sample variance 981 * @param v2 second sample variance 982 * @param n1 first sample n 983 * @param n2 second sample n 984 * @return approximate degrees of freedom 985 */ 986 protected double df(double v1, double v2, double n1, double n2) { 987 return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / 988 ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / 989 (n2 * n2 * (n2 - 1d))); 990 } 991 992 /** 993 * Computes t test statistic for 1-sample t-test. 994 * 995 * @param m sample mean 996 * @param mu constant to test against 997 * @param v sample variance 998 * @param n sample n 999 * @return t test statistic 1000 */ 1001 protected double t(final double m, final double mu, 1002 final double v, final double n) { 1003 return (m - mu) / FastMath.sqrt(v / n); 1004 } 1005 1006 /** 1007 * Computes t test statistic for 2-sample t-test. 1008 * <p> 1009 * Does not assume that subpopulation variances are equal.</p> 1010 * 1011 * @param m1 first sample mean 1012 * @param m2 second sample mean 1013 * @param v1 first sample variance 1014 * @param v2 second sample variance 1015 * @param n1 first sample n 1016 * @param n2 second sample n 1017 * @return t test statistic 1018 */ 1019 protected double t(final double m1, final double m2, 1020 final double v1, final double v2, 1021 final double n1, final double n2) { 1022 return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2)); 1023 } 1024 1025 /** 1026 * Computes t test statistic for 2-sample t-test under the hypothesis 1027 * of equal subpopulation variances. 1028 * 1029 * @param m1 first sample mean 1030 * @param m2 second sample mean 1031 * @param v1 first sample variance 1032 * @param v2 second sample variance 1033 * @param n1 first sample n 1034 * @param n2 second sample n 1035 * @return t test statistic 1036 */ 1037 protected double homoscedasticT(final double m1, final double m2, 1038 final double v1, final double v2, 1039 final double n1, final double n2) { 1040 final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); 1041 return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2)); 1042 } 1043 1044 /** 1045 * Computes p-value for 2-sided, 1-sample t-test. 1046 * 1047 * @param m sample mean 1048 * @param mu constant to test against 1049 * @param v sample variance 1050 * @param n sample n 1051 * @return p-value 1052 * @throws MaxCountExceededException if an error occurs computing the p-value 1053 * @throws MathIllegalArgumentException if n is not greater than 1 1054 */ 1055 protected double tTest(final double m, final double mu, 1056 final double v, final double n) 1057 throws MaxCountExceededException, MathIllegalArgumentException { 1058 1059 final double t = FastMath.abs(t(m, mu, v, n)); 1060 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 1061 final TDistribution distribution = new TDistribution(null, n - 1); 1062 return 2.0 * distribution.cumulativeProbability(-t); 1063 1064 } 1065 1066 /** 1067 * Computes p-value for 2-sided, 2-sample t-test. 1068 * <p> 1069 * Does not assume subpopulation variances are equal. Degrees of freedom 1070 * are estimated from the data.</p> 1071 * 1072 * @param m1 first sample mean 1073 * @param m2 second sample mean 1074 * @param v1 first sample variance 1075 * @param v2 second sample variance 1076 * @param n1 first sample n 1077 * @param n2 second sample n 1078 * @return p-value 1079 * @throws MaxCountExceededException if an error occurs computing the p-value 1080 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not 1081 * strictly positive 1082 */ 1083 protected double tTest(final double m1, final double m2, 1084 final double v1, final double v2, 1085 final double n1, final double n2) 1086 throws MaxCountExceededException, NotStrictlyPositiveException { 1087 1088 final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2)); 1089 final double degreesOfFreedom = df(v1, v2, n1, n2); 1090 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 1091 final TDistribution distribution = new TDistribution(null, degreesOfFreedom); 1092 return 2.0 * distribution.cumulativeProbability(-t); 1093 1094 } 1095 1096 /** 1097 * Computes p-value for 2-sided, 2-sample t-test, under the assumption 1098 * of equal subpopulation variances. 1099 * <p> 1100 * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> 1101 * 1102 * @param m1 first sample mean 1103 * @param m2 second sample mean 1104 * @param v1 first sample variance 1105 * @param v2 second sample variance 1106 * @param n1 first sample n 1107 * @param n2 second sample n 1108 * @return p-value 1109 * @throws MaxCountExceededException if an error occurs computing the p-value 1110 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not 1111 * strictly positive 1112 */ 1113 protected double homoscedasticTTest(double m1, double m2, 1114 double v1, double v2, 1115 double n1, double n2) 1116 throws MaxCountExceededException, NotStrictlyPositiveException { 1117 1118 final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); 1119 final double degreesOfFreedom = n1 + n2 - 2; 1120 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution 1121 final TDistribution distribution = new TDistribution(null, degreesOfFreedom); 1122 return 2.0 * distribution.cumulativeProbability(-t); 1123 1124 } 1125 1126 /** 1127 * Check significance level. 1128 * 1129 * @param alpha significance level 1130 * @throws OutOfRangeException if the significance level is out of bounds. 1131 */ 1132 private void checkSignificanceLevel(final double alpha) 1133 throws OutOfRangeException { 1134 1135 if (alpha <= 0 || alpha > 0.5) { 1136 throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL, 1137 alpha, 0.0, 0.5); 1138 } 1139 1140 } 1141 1142 /** 1143 * Check sample data. 1144 * 1145 * @param data Sample data. 1146 * @throws NullArgumentException if {@code data} is {@code null}. 1147 * @throws NumberIsTooSmallException if there is not enough sample data. 1148 */ 1149 private void checkSampleData(final double[] data) 1150 throws NullArgumentException, NumberIsTooSmallException { 1151 1152 if (data == null) { 1153 throw new NullArgumentException(); 1154 } 1155 if (data.length < 2) { 1156 throw new NumberIsTooSmallException( 1157 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC, 1158 data.length, 2, true); 1159 } 1160 1161 } 1162 1163 /** 1164 * Check sample data. 1165 * 1166 * @param stat Statistical summary. 1167 * @throws NullArgumentException if {@code data} is {@code null}. 1168 * @throws NumberIsTooSmallException if there is not enough sample data. 1169 */ 1170 private void checkSampleData(final StatisticalSummary stat) 1171 throws NullArgumentException, NumberIsTooSmallException { 1172 1173 if (stat == null) { 1174 throw new NullArgumentException(); 1175 } 1176 if (stat.getN() < 2) { 1177 throw new NumberIsTooSmallException( 1178 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC, 1179 stat.getN(), 2, true); 1180 } 1181 1182 } 1183 1184}