1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.inference;
18
19 import org.apache.commons.statistics.distribution.ChiSquaredDistribution;
20 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
21 import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
22 import org.apache.commons.math4.legacy.exception.NotPositiveException;
23 import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
24 import org.apache.commons.math4.legacy.exception.NullArgumentException;
25 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
26 import org.apache.commons.math4.legacy.exception.ZeroException;
27 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
28 import org.apache.commons.math4.core.jdkmath.JdkMath;
29 import org.apache.commons.math4.legacy.core.MathArrays;
30
31 /**
32 * Implements Chi-Square test statistics.
33 *
34 * <p>This implementation handles both known and unknown distributions.</p>
35 *
36 * <p>Two samples tests can be used when the distribution is unknown <i>a priori</i>
37 * but provided by one sample, or when the hypothesis under test is that the two
38 * samples come from the same underlying distribution.</p>
39 *
40 */
41 public class ChiSquareTest {
42
43 /**
44 * Construct a ChiSquareTest.
45 */
46 public ChiSquareTest() {
47 super();
48 }
49
50 /**
51 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
52 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
53 * frequency counts.
54 * <p>
55 * This statistic can be used to perform a Chi-Square test evaluating the null
56 * hypothesis that the observed counts follow the expected distribution.</p>
57 * <p>
58 * <strong>Preconditions</strong>: <ul>
59 * <li>Expected counts must all be positive.
60 * </li>
61 * <li>Observed counts must all be ≥ 0.
62 * </li>
63 * <li>The observed and expected arrays must have the same length and
64 * their common length must be at least 2.
65 * </li></ul><p>
66 * If any of the preconditions are not met, an
67 * <code>IllegalArgumentException</code> is thrown.</p>
68 * <p><strong>Note: </strong>This implementation rescales the
69 * <code>expected</code> array if necessary to ensure that the sum of the
70 * expected and observed counts are equal.</p>
71 *
72 * @param observed array of observed frequency counts
73 * @param expected array of expected frequency counts
74 * @return chiSquare test statistic
75 * @throws NotPositiveException if <code>observed</code> has negative entries
76 * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
77 * not strictly positive
78 * @throws DimensionMismatchException if the arrays length is less than 2
79 */
80 public double chiSquare(final double[] expected, final long[] observed)
81 throws NotPositiveException, NotStrictlyPositiveException,
82 DimensionMismatchException {
83
84 if (expected.length < 2) {
85 throw new DimensionMismatchException(expected.length, 2);
86 }
87 if (expected.length != observed.length) {
88 throw new DimensionMismatchException(expected.length, observed.length);
89 }
90 MathArrays.checkPositive(expected);
91 MathArrays.checkNonNegative(observed);
92
93 double sumExpected = 0d;
94 double sumObserved = 0d;
95 for (int i = 0; i < observed.length; i++) {
96 sumExpected += expected[i];
97 sumObserved += observed[i];
98 }
99 double ratio = 1.0d;
100 boolean rescale = false;
101 if (JdkMath.abs(sumExpected - sumObserved) > 10E-6) {
102 ratio = sumObserved / sumExpected;
103 rescale = true;
104 }
105 double sumSq = 0.0d;
106 for (int i = 0; i < observed.length; i++) {
107 if (rescale) {
108 final double dev = observed[i] - ratio * expected[i];
109 sumSq += dev * dev / (ratio * expected[i]);
110 } else {
111 final double dev = observed[i] - expected[i];
112 sumSq += dev * dev / expected[i];
113 }
114 }
115 return sumSq;
116 }
117
118 /**
119 * Returns the <i>observed significance level</i>, or <a href=
120 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
121 * p-value</a>, associated with a
122 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
123 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
124 * frequency counts to those in the <code>expected</code> array.
125 * <p>
126 * The number returned is the smallest significance level at which one can reject
127 * the null hypothesis that the observed counts conform to the frequency distribution
128 * described by the expected counts.</p>
129 * <p>
130 * <strong>Preconditions</strong>: <ul>
131 * <li>Expected counts must all be positive.
132 * </li>
133 * <li>Observed counts must all be ≥ 0.
134 * </li>
135 * <li>The observed and expected arrays must have the same length and
136 * their common length must be at least 2.
137 * </li></ul><p>
138 * If any of the preconditions are not met, an
139 * <code>IllegalArgumentException</code> is thrown.</p>
140 * <p><strong>Note: </strong>This implementation rescales the
141 * <code>expected</code> array if necessary to ensure that the sum of the
142 * expected and observed counts are equal.</p>
143 *
144 * @param observed array of observed frequency counts
145 * @param expected array of expected frequency counts
146 * @return p-value
147 * @throws NotPositiveException if <code>observed</code> has negative entries
148 * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
149 * not strictly positive
150 * @throws DimensionMismatchException if the arrays length is less than 2
151 * @throws MaxCountExceededException if an error occurs computing the p-value
152 */
153 public double chiSquareTest(final double[] expected, final long[] observed)
154 throws NotPositiveException, NotStrictlyPositiveException,
155 DimensionMismatchException, MaxCountExceededException {
156
157 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
158 final ChiSquaredDistribution distribution =
159 ChiSquaredDistribution.of(expected.length - 1.0);
160 return distribution.survivalProbability(chiSquare(expected, observed));
161 }
162
163 /**
164 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
165 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the
166 * observed counts conform to the frequency distribution described by the expected
167 * counts, with significance level <code>alpha</code>. Returns true iff the null
168 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
169 * <p>
170 * <strong>Example:</strong><br>
171 * To test the hypothesis that <code>observed</code> follows
172 * <code>expected</code> at the 99% level, use </p><p>
173 * <code>chiSquareTest(expected, observed, 0.01) </code></p>
174 * <p>
175 * <strong>Preconditions</strong>: <ul>
176 * <li>Expected counts must all be positive.
177 * </li>
178 * <li>Observed counts must all be ≥ 0.
179 * </li>
180 * <li>The observed and expected arrays must have the same length and
181 * their common length must be at least 2.
182 * <li> <code> 0 < alpha < 0.5 </code>
183 * </li></ul><p>
184 * If any of the preconditions are not met, an
185 * <code>IllegalArgumentException</code> is thrown.</p>
186 * <p><strong>Note: </strong>This implementation rescales the
187 * <code>expected</code> array if necessary to ensure that the sum of the
188 * expected and observed counts are equal.</p>
189 *
190 * @param observed array of observed frequency counts
191 * @param expected array of expected frequency counts
192 * @param alpha significance level of the test
193 * @return true iff null hypothesis can be rejected with confidence
194 * 1 - alpha
195 * @throws NotPositiveException if <code>observed</code> has negative entries
196 * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
197 * not strictly positive
198 * @throws DimensionMismatchException if the arrays length is less than 2
199 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
200 * @throws MaxCountExceededException if an error occurs computing the p-value
201 */
202 public boolean chiSquareTest(final double[] expected, final long[] observed,
203 final double alpha)
204 throws NotPositiveException, NotStrictlyPositiveException,
205 DimensionMismatchException, OutOfRangeException, MaxCountExceededException {
206
207 if (alpha <= 0 || alpha > 0.5) {
208 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
209 alpha, 0, 0.5);
210 }
211 return chiSquareTest(expected, observed) < alpha;
212 }
213
214 /**
215 * Computes the Chi-Square statistic associated with a
216 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
217 * chi-square test of independence</a> based on the input <code>counts</code>
218 * array, viewed as a two-way table.
219 * <p>
220 * The rows of the 2-way table are
221 * <code>count[0], ... , count[count.length - 1] </code></p>
222 * <p>
223 * <strong>Preconditions</strong>: <ul>
224 * <li>All counts must be ≥ 0.
225 * </li>
226 * <li>The sum of each row and column must be > 0.
227 * </li>
228 * <li>The count array must be rectangular (i.e. all count[i] subarrays
229 * must have the same length).
230 * </li>
231 * <li>The 2-way table represented by <code>counts</code> must have at
232 * least 2 columns and at least 2 rows.
233 * </li>
234 * </ul><p>
235 * If any of the preconditions are not met, an
236 * <code>IllegalArgumentException</code> is thrown.</p>
237 * <p>
238 * If a column or row contains only zeros this is invalid input and a
239 * <code>ZeroException</code> is thrown. The empty column/row should
240 * be removed from the input counts.</p>
241 *
242 * @param counts array representation of 2-way table
243 * @return chiSquare test statistic
244 * @throws NullArgumentException if the array is null
245 * @throws DimensionMismatchException if the array is not rectangular
246 * @throws NotPositiveException if {@code counts} has negative entries
247 * @throws ZeroException if the sum of a row or column is zero
248 */
249 public double chiSquare(final long[][] counts)
250 throws NullArgumentException, NotPositiveException,
251 DimensionMismatchException {
252
253 checkArray(counts);
254 int nRows = counts.length;
255 int nCols = counts[0].length;
256
257 // compute row, column and total sums
258 double[] rowSum = new double[nRows];
259 double[] colSum = new double[nCols];
260 double total = 0.0d;
261 for (int row = 0; row < nRows; row++) {
262 for (int col = 0; col < nCols; col++) {
263 rowSum[row] += counts[row][col];
264 colSum[col] += counts[row][col];
265 total += counts[row][col];
266 }
267 checkNonZero(rowSum[row], "row", row);
268 }
269
270 for (int col = 0; col < nCols; col++) {
271 checkNonZero(colSum[col], "column", col);
272 }
273
274 // compute expected counts and chi-square
275 double sumSq = 0.0d;
276 double expected = 0.0d;
277 for (int row = 0; row < nRows; row++) {
278 for (int col = 0; col < nCols; col++) {
279 expected = (rowSum[row] * colSum[col]) / total;
280 sumSq += ((counts[row][col] - expected) *
281 (counts[row][col] - expected)) / expected;
282 }
283 }
284 return sumSq;
285 }
286
287 /**
288 * Returns the <i>observed significance level</i>, or <a href=
289 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
290 * p-value</a>, associated with a
291 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
292 * chi-square test of independence</a> based on the input <code>counts</code>
293 * array, viewed as a two-way table.
294 * <p>
295 * The rows of the 2-way table are
296 * <code>count[0], ... , count[count.length - 1] </code></p>
297 * <p>
298 * <strong>Preconditions</strong>: <ul>
299 * <li>All counts must be ≥ 0.
300 * </li>
301 * <li>The sum of each row and column must be > 0.
302 * </li>
303 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
304 * the same length).
305 * </li>
306 * <li>The 2-way table represented by <code>counts</code> must have at least 2
307 * columns and at least 2 rows.
308 * </li>
309 * </ul><p>
310 * If any of the preconditions are not met, an
311 * <code>IllegalArgumentException</code> is thrown.</p>
312 * <p>
313 * If a column or row contains only zeros this is invalid input and a
314 * <code>ZeroException</code> is thrown. The empty column/row should
315 * be removed from the input counts.</p>
316 *
317 * @param counts array representation of 2-way table
318 * @return p-value
319 * @throws NullArgumentException if the array is null
320 * @throws DimensionMismatchException if the array is not rectangular
321 * @throws NotPositiveException if {@code counts} has negative entries
322 * @throws MaxCountExceededException if an error occurs computing the p-value
323 * @throws ZeroException if the sum of a row or column is zero
324 */
325 public double chiSquareTest(final long[][] counts)
326 throws NullArgumentException, DimensionMismatchException,
327 NotPositiveException, MaxCountExceededException {
328
329 checkArray(counts);
330 double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
331 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
332 final ChiSquaredDistribution distribution = ChiSquaredDistribution.of(df);
333 return distribution.survivalProbability(chiSquare(counts));
334 }
335
336 /**
337 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
338 * chi-square test of independence</a> evaluating the null hypothesis that the
339 * classifications represented by the counts in the columns of the input 2-way table
340 * are independent of the rows, with significance level <code>alpha</code>.
341 * Returns true iff the null hypothesis can be rejected with 100 * (1 - alpha) percent
342 * confidence.
343 * <p>
344 * The rows of the 2-way table are
345 * <code>count[0], ... , count[count.length - 1] </code></p>
346 * <p>
347 * <strong>Example:</strong><br>
348 * To test the null hypothesis that the counts in
349 * <code>count[0], ... , count[count.length - 1] </code>
350 * all correspond to the same underlying probability distribution at the 99% level, use</p>
351 * <p><code>chiSquareTest(counts, 0.01)</code></p>
352 * <p>
353 * <strong>Preconditions</strong>: <ul>
354 * <li>All counts must be ≥ 0.
355 * </li>
356 * <li>The sum of each row and column must be > 0.
357 * </li>
358 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the
359 * same length).</li>
360 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
361 * at least 2 rows.</li>
362 * </ul><p>
363 * If any of the preconditions are not met, an
364 * <code>IllegalArgumentException</code> is thrown.</p>
365 * <p>
366 * If a column or row contains only zeros this is invalid input and a
367 * <code>ZeroException</code> is thrown. The empty column/row should
368 * be removed from the input counts.</p>
369 *
370 * @param counts array representation of 2-way table
371 * @param alpha significance level of the test
372 * @return true iff null hypothesis can be rejected with confidence
373 * 1 - alpha
374 * @throws NullArgumentException if the array is null
375 * @throws DimensionMismatchException if the array is not rectangular
376 * @throws NotPositiveException if {@code counts} has any negative entries
377 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
378 * @throws MaxCountExceededException if an error occurs computing the p-value
379 * @throws ZeroException if the sum of a row or column is zero
380 */
381 public boolean chiSquareTest(final long[][] counts, final double alpha)
382 throws NullArgumentException, DimensionMismatchException,
383 NotPositiveException, OutOfRangeException, MaxCountExceededException {
384
385 if (alpha <= 0 || alpha > 0.5) {
386 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
387 alpha, 0, 0.5);
388 }
389 return chiSquareTest(counts) < alpha;
390 }
391
392 /**
393 * <p>Computes a
394 * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
395 * Chi-Square two sample test statistic</a> comparing bin frequency counts
396 * in <code>observed1</code> and <code>observed2</code>. The
397 * sums of frequency counts in the two samples are not required to be the
398 * same. The formula used to compute the test statistic is</p>
399 * <code>
400 * ∑[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
401 * </code> where
402 * <br><code>K = √[∑(observed2 / ∑(observed1)]</code>
403 *
404 * <p>This statistic can be used to perform a Chi-Square test evaluating the
405 * null hypothesis that both observed counts follow the same distribution.</p>
406 * <p>
407 * <strong>Preconditions</strong>: <ul>
408 * <li>Observed counts must be non-negative.
409 * </li>
410 * <li>Observed counts for a specific bin must not both be zero.
411 * </li>
412 * <li>Observed counts for a specific sample must not all be 0.
413 * </li>
414 * <li>The arrays <code>observed1</code> and <code>observed2</code> must have
415 * the same length and their common length must be at least 2.
416 * </li></ul><p>
417 * If any of the preconditions are not met, an
418 * <code>IllegalArgumentException</code> is thrown.</p>
419 *
420 * @param observed1 array of observed frequency counts of the first data set
421 * @param observed2 array of observed frequency counts of the second data set
422 * @return chiSquare test statistic
423 * @throws DimensionMismatchException the length of the arrays does not match
424 * @throws NotPositiveException if any entries in <code>observed1</code> or
425 * <code>observed2</code> are negative
426 * @throws ZeroException if either all counts of <code>observed1</code> or
427 * <code>observed2</code> are zero, or if the count at some index is zero
428 * for both arrays
429 * @since 1.2
430 */
431 public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
432 throws DimensionMismatchException, NotPositiveException, ZeroException {
433
434 // Make sure lengths are same
435 if (observed1.length < 2) {
436 throw new DimensionMismatchException(observed1.length, 2);
437 }
438 if (observed1.length != observed2.length) {
439 throw new DimensionMismatchException(observed1.length, observed2.length);
440 }
441
442 // Ensure non-negative counts
443 MathArrays.checkNonNegative(observed1);
444 MathArrays.checkNonNegative(observed2);
445
446 // Compute and compare count sums
447 long countSum1 = 0;
448 long countSum2 = 0;
449 boolean unequalCounts = false;
450 double weight = 0.0;
451 for (int i = 0; i < observed1.length; i++) {
452 countSum1 += observed1[i];
453 countSum2 += observed2[i];
454 }
455 // Ensure neither sample is uniformly 0
456 if (countSum1 == 0 || countSum2 == 0) {
457 throw new ZeroException();
458 }
459 // Compare and compute weight only if different
460 unequalCounts = countSum1 != countSum2;
461 if (unequalCounts) {
462 weight = JdkMath.sqrt((double) countSum1 / (double) countSum2);
463 }
464 // Compute ChiSquare statistic
465 double sumSq = 0.0d;
466 double dev = 0.0d;
467 double obs1 = 0.0d;
468 double obs2 = 0.0d;
469 for (int i = 0; i < observed1.length; i++) {
470 if (observed1[i] == 0 && observed2[i] == 0) {
471 throw new ZeroException(LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
472 } else {
473 obs1 = observed1[i];
474 obs2 = observed2[i];
475 if (unequalCounts) { // apply weights
476 dev = obs1/weight - obs2 * weight;
477 } else {
478 dev = obs1 - obs2;
479 }
480 sumSq += (dev * dev) / (obs1 + obs2);
481 }
482 }
483 return sumSq;
484 }
485
486 /**
487 * <p>Returns the <i>observed significance level</i>, or <a href=
488 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
489 * p-value</a>, associated with a Chi-Square two sample test comparing
490 * bin frequency counts in <code>observed1</code> and
491 * <code>observed2</code>.
492 * </p>
493 * <p>The number returned is the smallest significance level at which one
494 * can reject the null hypothesis that the observed counts conform to the
495 * same distribution.
496 * </p>
497 * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
498 * on the formula used to compute the test statistic. The degrees of
499 * of freedom used to perform the test is one less than the common length
500 * of the input observed count arrays.
501 * </p>
502 * <strong>Preconditions</strong>: <ul>
503 * <li>Observed counts must be non-negative.
504 * </li>
505 * <li>Observed counts for a specific bin must not both be zero.
506 * </li>
507 * <li>Observed counts for a specific sample must not all be 0.
508 * </li>
509 * <li>The arrays <code>observed1</code> and <code>observed2</code> must
510 * have the same length and
511 * their common length must be at least 2.
512 * </li></ul><p>
513 * If any of the preconditions are not met, an
514 * <code>IllegalArgumentException</code> is thrown.</p>
515 *
516 * @param observed1 array of observed frequency counts of the first data set
517 * @param observed2 array of observed frequency counts of the second data set
518 * @return p-value
519 * @throws DimensionMismatchException the length of the arrays does not match
520 * @throws NotPositiveException if any entries in <code>observed1</code> or
521 * <code>observed2</code> are negative
522 * @throws ZeroException if either all counts of <code>observed1</code> or
523 * <code>observed2</code> are zero, or if the count at the same index is zero
524 * for both arrays
525 * @throws MaxCountExceededException if an error occurs computing the p-value
526 * @since 1.2
527 */
528 public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
529 throws DimensionMismatchException, NotPositiveException, ZeroException,
530 MaxCountExceededException {
531
532 // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
533 final ChiSquaredDistribution distribution =
534 ChiSquaredDistribution.of((double) observed1.length - 1);
535 return distribution.survivalProbability(
536 chiSquareDataSetsComparison(observed1, observed2));
537 }
538
539 /**
540 * <p>Performs a Chi-Square two sample test comparing two binned data
541 * sets. The test evaluates the null hypothesis that the two lists of
542 * observed counts conform to the same frequency distribution, with
543 * significance level <code>alpha</code>. Returns true iff the null
544 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
545 * </p>
546 * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
547 * details on the formula used to compute the Chisquare statistic used
548 * in the test. The degrees of of freedom used to perform the test is
549 * one less than the common length of the input observed count arrays.
550 * </p>
551 * <strong>Preconditions</strong>: <ul>
552 * <li>Observed counts must be non-negative.
553 * </li>
554 * <li>Observed counts for a specific bin must not both be zero.
555 * </li>
556 * <li>Observed counts for a specific sample must not all be 0.
557 * </li>
558 * <li>The arrays <code>observed1</code> and <code>observed2</code> must
559 * have the same length and their common length must be at least 2.
560 * </li>
561 * <li> <code> 0 < alpha < 0.5 </code>
562 * </li></ul><p>
563 * If any of the preconditions are not met, an
564 * <code>IllegalArgumentException</code> is thrown.</p>
565 *
566 * @param observed1 array of observed frequency counts of the first data set
567 * @param observed2 array of observed frequency counts of the second data set
568 * @param alpha significance level of the test
569 * @return true iff null hypothesis can be rejected with confidence
570 * 1 - alpha
571 * @throws DimensionMismatchException the length of the arrays does not match
572 * @throws NotPositiveException if any entries in <code>observed1</code> or
573 * <code>observed2</code> are negative
574 * @throws ZeroException if either all counts of <code>observed1</code> or
575 * <code>observed2</code> are zero, or if the count at the same index is zero
576 * for both arrays
577 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
578 * @throws MaxCountExceededException if an error occurs performing the test
579 * @since 1.2
580 */
581 public boolean chiSquareTestDataSetsComparison(final long[] observed1,
582 final long[] observed2,
583 final double alpha)
584 throws DimensionMismatchException, NotPositiveException,
585 ZeroException, OutOfRangeException, MaxCountExceededException {
586
587 if (alpha <= 0 ||
588 alpha > 0.5) {
589 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
590 alpha, 0, 0.5);
591 }
592 return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
593 }
594
595 /**
596 * Checks to make sure that the input long[][] array is rectangular,
597 * has at least 2 rows and 2 columns, and has all non-negative entries.
598 *
599 * @param in input 2-way table to check
600 * @throws NullArgumentException if the array is null
601 * @throws DimensionMismatchException if the array is not valid
602 * @throws NotPositiveException if the array contains any negative entries
603 */
604 private void checkArray(final long[][] in)
605 throws NullArgumentException, DimensionMismatchException,
606 NotPositiveException {
607
608 if (in.length < 2) {
609 throw new DimensionMismatchException(in.length, 2);
610 }
611
612 if (in[0].length < 2) {
613 throw new DimensionMismatchException(in[0].length, 2);
614 }
615
616 MathArrays.checkRectangular(in);
617 MathArrays.checkNonNegative(in);
618 }
619
620 /**
621 * Check the array value is non-zero.
622 *
623 * @param value Value
624 * @param name Name of the array
625 * @param index Index in the array
626 * @throws ZeroException if the value is zero
627 */
628 private static void checkNonZero(double value, String name, int index) {
629 if (value == 0) {
630 throw new ZeroException(LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO,
631 name + " " + index);
632 }
633 }
634 }