View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.inference;
18  
19  import org.apache.commons.statistics.distribution.ChiSquaredDistribution;
20  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
21  import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
22  import org.apache.commons.math4.legacy.exception.NotPositiveException;
23  import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
24  import org.apache.commons.math4.legacy.exception.NullArgumentException;
25  import org.apache.commons.math4.legacy.exception.OutOfRangeException;
26  import org.apache.commons.math4.legacy.exception.ZeroException;
27  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
28  import org.apache.commons.math4.core.jdkmath.JdkMath;
29  import org.apache.commons.math4.legacy.core.MathArrays;
30  
31  /**
32   * Implements Chi-Square test statistics.
33   *
34   * <p>This implementation handles both known and unknown distributions.</p>
35   *
36   * <p>Two samples tests can be used when the distribution is unknown <i>a priori</i>
37   * but provided by one sample, or when the hypothesis under test is that the two
38   * samples come from the same underlying distribution.</p>
39   *
40   */
41  public class ChiSquareTest {
42  
43      /**
44       * Construct a ChiSquareTest.
45       */
46      public ChiSquareTest() {
47          super();
48      }
49  
50      /**
51       * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
52       * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
53       * frequency counts.
54       * <p>
55       * This statistic can be used to perform a Chi-Square test evaluating the null
56       * hypothesis that the observed counts follow the expected distribution.</p>
57       * <p>
58       * <strong>Preconditions</strong>: <ul>
59       * <li>Expected counts must all be positive.
60       * </li>
61       * <li>Observed counts must all be &ge; 0.
62       * </li>
63       * <li>The observed and expected arrays must have the same length and
64       * their common length must be at least 2.
65       * </li></ul><p>
66       * If any of the preconditions are not met, an
67       * <code>IllegalArgumentException</code> is thrown.</p>
68       * <p><strong>Note: </strong>This implementation rescales the
69       * <code>expected</code> array if necessary to ensure that the sum of the
70       * expected and observed counts are equal.</p>
71       *
72       * @param observed array of observed frequency counts
73       * @param expected array of expected frequency counts
74       * @return chiSquare test statistic
75       * @throws NotPositiveException if <code>observed</code> has negative entries
76       * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
77       * not strictly positive
78       * @throws DimensionMismatchException if the arrays length is less than 2
79       */
80      public double chiSquare(final double[] expected, final long[] observed)
81          throws NotPositiveException, NotStrictlyPositiveException,
82          DimensionMismatchException {
83  
84          if (expected.length < 2) {
85              throw new DimensionMismatchException(expected.length, 2);
86          }
87          if (expected.length != observed.length) {
88              throw new DimensionMismatchException(expected.length, observed.length);
89          }
90          MathArrays.checkPositive(expected);
91          MathArrays.checkNonNegative(observed);
92  
93          double sumExpected = 0d;
94          double sumObserved = 0d;
95          for (int i = 0; i < observed.length; i++) {
96              sumExpected += expected[i];
97              sumObserved += observed[i];
98          }
99          double ratio = 1.0d;
100         boolean rescale = false;
101         if (JdkMath.abs(sumExpected - sumObserved) > 10E-6) {
102             ratio = sumObserved / sumExpected;
103             rescale = true;
104         }
105         double sumSq = 0.0d;
106         for (int i = 0; i < observed.length; i++) {
107             if (rescale) {
108                 final double dev = observed[i] - ratio * expected[i];
109                 sumSq += dev * dev / (ratio * expected[i]);
110             } else {
111                 final double dev = observed[i] - expected[i];
112                 sumSq += dev * dev / expected[i];
113             }
114         }
115         return sumSq;
116     }
117 
118     /**
119      * Returns the <i>observed significance level</i>, or <a href=
120      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
121      * p-value</a>, associated with a
122      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
123      * Chi-square goodness of fit test</a> comparing the <code>observed</code>
124      * frequency counts to those in the <code>expected</code> array.
125      * <p>
126      * The number returned is the smallest significance level at which one can reject
127      * the null hypothesis that the observed counts conform to the frequency distribution
128      * described by the expected counts.</p>
129      * <p>
130      * <strong>Preconditions</strong>: <ul>
131      * <li>Expected counts must all be positive.
132      * </li>
133      * <li>Observed counts must all be &ge; 0.
134      * </li>
135      * <li>The observed and expected arrays must have the same length and
136      * their common length must be at least 2.
137      * </li></ul><p>
138      * If any of the preconditions are not met, an
139      * <code>IllegalArgumentException</code> is thrown.</p>
140      * <p><strong>Note: </strong>This implementation rescales the
141      * <code>expected</code> array if necessary to ensure that the sum of the
142      * expected and observed counts are equal.</p>
143      *
144      * @param observed array of observed frequency counts
145      * @param expected array of expected frequency counts
146      * @return p-value
147      * @throws NotPositiveException if <code>observed</code> has negative entries
148      * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
149      * not strictly positive
150      * @throws DimensionMismatchException if the arrays length is less than 2
151      * @throws MaxCountExceededException if an error occurs computing the p-value
152      */
153     public double chiSquareTest(final double[] expected, final long[] observed)
154         throws NotPositiveException, NotStrictlyPositiveException,
155         DimensionMismatchException, MaxCountExceededException {
156 
157         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
158         final ChiSquaredDistribution distribution =
159             ChiSquaredDistribution.of(expected.length - 1.0);
160         return distribution.survivalProbability(chiSquare(expected, observed));
161     }
162 
163     /**
164      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
165      * Chi-square goodness of fit test</a> evaluating the null hypothesis that the
166      * observed counts conform to the frequency distribution described by the expected
167      * counts, with significance level <code>alpha</code>.  Returns true iff the null
168      * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
169      * <p>
170      * <strong>Example:</strong><br>
171      * To test the hypothesis that <code>observed</code> follows
172      * <code>expected</code> at the 99% level, use </p><p>
173      * <code>chiSquareTest(expected, observed, 0.01) </code></p>
174      * <p>
175      * <strong>Preconditions</strong>: <ul>
176      * <li>Expected counts must all be positive.
177      * </li>
178      * <li>Observed counts must all be &ge; 0.
179      * </li>
180      * <li>The observed and expected arrays must have the same length and
181      * their common length must be at least 2.
182      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
183      * </li></ul><p>
184      * If any of the preconditions are not met, an
185      * <code>IllegalArgumentException</code> is thrown.</p>
186      * <p><strong>Note: </strong>This implementation rescales the
187      * <code>expected</code> array if necessary to ensure that the sum of the
188      * expected and observed counts are equal.</p>
189      *
190      * @param observed array of observed frequency counts
191      * @param expected array of expected frequency counts
192      * @param alpha significance level of the test
193      * @return true iff null hypothesis can be rejected with confidence
194      * 1 - alpha
195      * @throws NotPositiveException if <code>observed</code> has negative entries
196      * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
197      * not strictly positive
198      * @throws DimensionMismatchException if the arrays length is less than 2
199      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
200      * @throws MaxCountExceededException if an error occurs computing the p-value
201      */
202     public boolean chiSquareTest(final double[] expected, final long[] observed,
203                                  final double alpha)
204         throws NotPositiveException, NotStrictlyPositiveException,
205         DimensionMismatchException, OutOfRangeException, MaxCountExceededException {
206 
207         if (alpha <= 0 || alpha > 0.5) {
208             throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
209                                           alpha, 0, 0.5);
210         }
211         return chiSquareTest(expected, observed) < alpha;
212     }
213 
214     /**
215      *  Computes the Chi-Square statistic associated with a
216      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
217      *  chi-square test of independence</a> based on the input <code>counts</code>
218      *  array, viewed as a two-way table.
219      * <p>
220      * The rows of the 2-way table are
221      * <code>count[0], ... , count[count.length - 1] </code></p>
222      * <p>
223      * <strong>Preconditions</strong>: <ul>
224      * <li>All counts must be &ge; 0.
225      * </li>
226      * <li>The sum of each row and column must be &gt; 0.
227      * </li>
228      * <li>The count array must be rectangular (i.e. all count[i] subarrays
229      *  must have the same length).
230      * </li>
231      * <li>The 2-way table represented by <code>counts</code> must have at
232      *  least 2 columns and at least 2 rows.
233      * </li>
234      * </ul><p>
235      * If any of the preconditions are not met, an
236      * <code>IllegalArgumentException</code> is thrown.</p>
237      * <p>
238      * If a column or row contains only zeros this is invalid input and a
239      * <code>ZeroException</code> is thrown. The empty column/row should
240      * be removed from the input counts.</p>
241      *
242      * @param counts array representation of 2-way table
243      * @return chiSquare test statistic
244      * @throws NullArgumentException if the array is null
245      * @throws DimensionMismatchException if the array is not rectangular
246      * @throws NotPositiveException if {@code counts} has negative entries
247      * @throws ZeroException if the sum of a row or column is zero
248      */
249     public double chiSquare(final long[][] counts)
250         throws NullArgumentException, NotPositiveException,
251         DimensionMismatchException {
252 
253         checkArray(counts);
254         int nRows = counts.length;
255         int nCols = counts[0].length;
256 
257         // compute row, column and total sums
258         double[] rowSum = new double[nRows];
259         double[] colSum = new double[nCols];
260         double total = 0.0d;
261         for (int row = 0; row < nRows; row++) {
262             for (int col = 0; col < nCols; col++) {
263                 rowSum[row] += counts[row][col];
264                 colSum[col] += counts[row][col];
265                 total += counts[row][col];
266             }
267             checkNonZero(rowSum[row], "row", row);
268         }
269 
270         for (int col = 0; col < nCols; col++) {
271             checkNonZero(colSum[col], "column", col);
272         }
273 
274         // compute expected counts and chi-square
275         double sumSq = 0.0d;
276         double expected = 0.0d;
277         for (int row = 0; row < nRows; row++) {
278             for (int col = 0; col < nCols; col++) {
279                 expected = (rowSum[row] * colSum[col]) / total;
280                 sumSq += ((counts[row][col] - expected) *
281                         (counts[row][col] - expected)) / expected;
282             }
283         }
284         return sumSq;
285     }
286 
287     /**
288      * Returns the <i>observed significance level</i>, or <a href=
289      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
290      * p-value</a>, associated with a
291      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
292      * chi-square test of independence</a> based on the input <code>counts</code>
293      * array, viewed as a two-way table.
294      * <p>
295      * The rows of the 2-way table are
296      * <code>count[0], ... , count[count.length - 1] </code></p>
297      * <p>
298      * <strong>Preconditions</strong>: <ul>
299      * <li>All counts must be &ge; 0.
300      * </li>
301      * <li>The sum of each row and column must be &gt; 0.
302      * </li>
303      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
304      *     the same length).
305      * </li>
306      * <li>The 2-way table represented by <code>counts</code> must have at least 2
307      *     columns and at least 2 rows.
308      * </li>
309      * </ul><p>
310      * If any of the preconditions are not met, an
311      * <code>IllegalArgumentException</code> is thrown.</p>
312      * <p>
313      * If a column or row contains only zeros this is invalid input and a
314      * <code>ZeroException</code> is thrown. The empty column/row should
315      * be removed from the input counts.</p>
316      *
317      * @param counts array representation of 2-way table
318      * @return p-value
319      * @throws NullArgumentException if the array is null
320      * @throws DimensionMismatchException if the array is not rectangular
321      * @throws NotPositiveException if {@code counts} has negative entries
322      * @throws MaxCountExceededException if an error occurs computing the p-value
323      * @throws ZeroException if the sum of a row or column is zero
324      */
325     public double chiSquareTest(final long[][] counts)
326         throws NullArgumentException, DimensionMismatchException,
327         NotPositiveException, MaxCountExceededException {
328 
329         checkArray(counts);
330         double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
331         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
332         final ChiSquaredDistribution distribution = ChiSquaredDistribution.of(df);
333         return distribution.survivalProbability(chiSquare(counts));
334     }
335 
336     /**
337      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
338      * chi-square test of independence</a> evaluating the null hypothesis that the
339      * classifications represented by the counts in the columns of the input 2-way table
340      * are independent of the rows, with significance level <code>alpha</code>.
341      * Returns true iff the null hypothesis can be rejected with 100 * (1 - alpha) percent
342      * confidence.
343      * <p>
344      * The rows of the 2-way table are
345      * <code>count[0], ... , count[count.length - 1] </code></p>
346      * <p>
347      * <strong>Example:</strong><br>
348      * To test the null hypothesis that the counts in
349      * <code>count[0], ... , count[count.length - 1] </code>
350      *  all correspond to the same underlying probability distribution at the 99% level, use</p>
351      * <p><code>chiSquareTest(counts, 0.01)</code></p>
352      * <p>
353      * <strong>Preconditions</strong>: <ul>
354      * <li>All counts must be &ge; 0.
355      * </li>
356      * <li>The sum of each row and column must be &gt; 0.
357      * </li>
358      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the
359      *     same length).</li>
360      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
361      *     at least 2 rows.</li>
362      * </ul><p>
363      * If any of the preconditions are not met, an
364      * <code>IllegalArgumentException</code> is thrown.</p>
365      * <p>
366      * If a column or row contains only zeros this is invalid input and a
367      * <code>ZeroException</code> is thrown. The empty column/row should
368      * be removed from the input counts.</p>
369      *
370      * @param counts array representation of 2-way table
371      * @param alpha significance level of the test
372      * @return true iff null hypothesis can be rejected with confidence
373      * 1 - alpha
374      * @throws NullArgumentException if the array is null
375      * @throws DimensionMismatchException if the array is not rectangular
376      * @throws NotPositiveException if {@code counts} has any negative entries
377      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
378      * @throws MaxCountExceededException if an error occurs computing the p-value
379      * @throws ZeroException if the sum of a row or column is zero
380      */
381     public boolean chiSquareTest(final long[][] counts, final double alpha)
382         throws NullArgumentException, DimensionMismatchException,
383         NotPositiveException, OutOfRangeException, MaxCountExceededException {
384 
385         if (alpha <= 0 || alpha > 0.5) {
386             throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
387                                           alpha, 0, 0.5);
388         }
389         return chiSquareTest(counts) < alpha;
390     }
391 
392     /**
393      * <p>Computes a
394      * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
395      * Chi-Square two sample test statistic</a> comparing bin frequency counts
396      * in <code>observed1</code> and <code>observed2</code>.  The
397      * sums of frequency counts in the two samples are not required to be the
398      * same.  The formula used to compute the test statistic is</p>
399      * <code>
400      * &sum;[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
401      * </code> where
402      * <br><code>K = &radic;[&sum;(observed2 / &sum;(observed1)]</code>
403      *
404      * <p>This statistic can be used to perform a Chi-Square test evaluating the
405      * null hypothesis that both observed counts follow the same distribution.</p>
406      * <p>
407      * <strong>Preconditions</strong>: <ul>
408      * <li>Observed counts must be non-negative.
409      * </li>
410      * <li>Observed counts for a specific bin must not both be zero.
411      * </li>
412      * <li>Observed counts for a specific sample must not all be 0.
413      * </li>
414      * <li>The arrays <code>observed1</code> and <code>observed2</code> must have
415      * the same length and their common length must be at least 2.
416      * </li></ul><p>
417      * If any of the preconditions are not met, an
418      * <code>IllegalArgumentException</code> is thrown.</p>
419      *
420      * @param observed1 array of observed frequency counts of the first data set
421      * @param observed2 array of observed frequency counts of the second data set
422      * @return chiSquare test statistic
423      * @throws DimensionMismatchException the length of the arrays does not match
424      * @throws NotPositiveException if any entries in <code>observed1</code> or
425      * <code>observed2</code> are negative
426      * @throws ZeroException if either all counts of <code>observed1</code> or
427      * <code>observed2</code> are zero, or if the count at some index is zero
428      * for both arrays
429      * @since 1.2
430      */
431     public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
432         throws DimensionMismatchException, NotPositiveException, ZeroException {
433 
434         // Make sure lengths are same
435         if (observed1.length < 2) {
436             throw new DimensionMismatchException(observed1.length, 2);
437         }
438         if (observed1.length != observed2.length) {
439             throw new DimensionMismatchException(observed1.length, observed2.length);
440         }
441 
442         // Ensure non-negative counts
443         MathArrays.checkNonNegative(observed1);
444         MathArrays.checkNonNegative(observed2);
445 
446         // Compute and compare count sums
447         long countSum1 = 0;
448         long countSum2 = 0;
449         boolean unequalCounts = false;
450         double weight = 0.0;
451         for (int i = 0; i < observed1.length; i++) {
452             countSum1 += observed1[i];
453             countSum2 += observed2[i];
454         }
455         // Ensure neither sample is uniformly 0
456         if (countSum1 == 0 || countSum2 == 0) {
457             throw new ZeroException();
458         }
459         // Compare and compute weight only if different
460         unequalCounts = countSum1 != countSum2;
461         if (unequalCounts) {
462             weight = JdkMath.sqrt((double) countSum1 / (double) countSum2);
463         }
464         // Compute ChiSquare statistic
465         double sumSq = 0.0d;
466         double dev = 0.0d;
467         double obs1 = 0.0d;
468         double obs2 = 0.0d;
469         for (int i = 0; i < observed1.length; i++) {
470             if (observed1[i] == 0 && observed2[i] == 0) {
471                 throw new ZeroException(LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
472             } else {
473                 obs1 = observed1[i];
474                 obs2 = observed2[i];
475                 if (unequalCounts) { // apply weights
476                     dev = obs1/weight - obs2 * weight;
477                 } else {
478                     dev = obs1 - obs2;
479                 }
480                 sumSq += (dev * dev) / (obs1 + obs2);
481             }
482         }
483         return sumSq;
484     }
485 
486     /**
487      * <p>Returns the <i>observed significance level</i>, or <a href=
488      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
489      * p-value</a>, associated with a Chi-Square two sample test comparing
490      * bin frequency counts in <code>observed1</code> and
491      * <code>observed2</code>.
492      * </p>
493      * <p>The number returned is the smallest significance level at which one
494      * can reject the null hypothesis that the observed counts conform to the
495      * same distribution.
496      * </p>
497      * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
498      * on the formula used to compute the test statistic. The degrees of
499      * of freedom used to perform the test is one less than the common length
500      * of the input observed count arrays.
501      * </p>
502      * <strong>Preconditions</strong>: <ul>
503      * <li>Observed counts must be non-negative.
504      * </li>
505      * <li>Observed counts for a specific bin must not both be zero.
506      * </li>
507      * <li>Observed counts for a specific sample must not all be 0.
508      * </li>
509      * <li>The arrays <code>observed1</code> and <code>observed2</code> must
510      * have the same length and
511      * their common length must be at least 2.
512      * </li></ul><p>
513      * If any of the preconditions are not met, an
514      * <code>IllegalArgumentException</code> is thrown.</p>
515      *
516      * @param observed1 array of observed frequency counts of the first data set
517      * @param observed2 array of observed frequency counts of the second data set
518      * @return p-value
519      * @throws DimensionMismatchException the length of the arrays does not match
520      * @throws NotPositiveException if any entries in <code>observed1</code> or
521      * <code>observed2</code> are negative
522      * @throws ZeroException if either all counts of <code>observed1</code> or
523      * <code>observed2</code> are zero, or if the count at the same index is zero
524      * for both arrays
525      * @throws MaxCountExceededException if an error occurs computing the p-value
526      * @since 1.2
527      */
528     public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
529         throws DimensionMismatchException, NotPositiveException, ZeroException,
530         MaxCountExceededException {
531 
532         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
533         final ChiSquaredDistribution distribution =
534                 ChiSquaredDistribution.of((double) observed1.length - 1);
535         return distribution.survivalProbability(
536                 chiSquareDataSetsComparison(observed1, observed2));
537     }
538 
539     /**
540      * <p>Performs a Chi-Square two sample test comparing two binned data
541      * sets. The test evaluates the null hypothesis that the two lists of
542      * observed counts conform to the same frequency distribution, with
543      * significance level <code>alpha</code>.  Returns true iff the null
544      * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
545      * </p>
546      * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
547      * details on the formula used to compute the Chisquare statistic used
548      * in the test. The degrees of of freedom used to perform the test is
549      * one less than the common length of the input observed count arrays.
550      * </p>
551      * <strong>Preconditions</strong>: <ul>
552      * <li>Observed counts must be non-negative.
553      * </li>
554      * <li>Observed counts for a specific bin must not both be zero.
555      * </li>
556      * <li>Observed counts for a specific sample must not all be 0.
557      * </li>
558      * <li>The arrays <code>observed1</code> and <code>observed2</code> must
559      * have the same length and their common length must be at least 2.
560      * </li>
561      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
562      * </li></ul><p>
563      * If any of the preconditions are not met, an
564      * <code>IllegalArgumentException</code> is thrown.</p>
565      *
566      * @param observed1 array of observed frequency counts of the first data set
567      * @param observed2 array of observed frequency counts of the second data set
568      * @param alpha significance level of the test
569      * @return true iff null hypothesis can be rejected with confidence
570      * 1 - alpha
571      * @throws DimensionMismatchException the length of the arrays does not match
572      * @throws NotPositiveException if any entries in <code>observed1</code> or
573      * <code>observed2</code> are negative
574      * @throws ZeroException if either all counts of <code>observed1</code> or
575      * <code>observed2</code> are zero, or if the count at the same index is zero
576      * for both arrays
577      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
578      * @throws MaxCountExceededException if an error occurs performing the test
579      * @since 1.2
580      */
581     public boolean chiSquareTestDataSetsComparison(final long[] observed1,
582                                                    final long[] observed2,
583                                                    final double alpha)
584         throws DimensionMismatchException, NotPositiveException,
585         ZeroException, OutOfRangeException, MaxCountExceededException {
586 
587         if (alpha <= 0 ||
588             alpha > 0.5) {
589             throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
590                                           alpha, 0, 0.5);
591         }
592         return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
593     }
594 
595     /**
596      * Checks to make sure that the input long[][] array is rectangular,
597      * has at least 2 rows and 2 columns, and has all non-negative entries.
598      *
599      * @param in input 2-way table to check
600      * @throws NullArgumentException if the array is null
601      * @throws DimensionMismatchException if the array is not valid
602      * @throws NotPositiveException if the array contains any negative entries
603      */
604     private void checkArray(final long[][] in)
605         throws NullArgumentException, DimensionMismatchException,
606         NotPositiveException {
607 
608         if (in.length < 2) {
609             throw new DimensionMismatchException(in.length, 2);
610         }
611 
612         if (in[0].length < 2) {
613             throw new DimensionMismatchException(in[0].length, 2);
614         }
615 
616         MathArrays.checkRectangular(in);
617         MathArrays.checkNonNegative(in);
618     }
619 
620     /**
621      * Check the array value is non-zero.
622      *
623      * @param value Value
624      * @param name Name of the array
625      * @param index Index in the array
626      * @throws ZeroException if the value is zero
627      */
628     private static void checkNonZero(double value, String name, int index) {
629         if (value == 0) {
630             throw new ZeroException(LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO,
631                 name + " " + index);
632         }
633     }
634 }