001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.inference;
018    
019    import org.apache.commons.math.MathException;
020    import org.apache.commons.math.exception.NullArgumentException;
021    import org.apache.commons.math.exception.OutOfRangeException;
022    import org.apache.commons.math.exception.DimensionMismatchException;
023    import org.apache.commons.math.exception.MathIllegalArgumentException;
024    import org.apache.commons.math.distribution.ChiSquaredDistribution;
025    import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
026    import org.apache.commons.math.exception.util.LocalizedFormats;
027    import org.apache.commons.math.util.FastMath;
028    import org.apache.commons.math.util.MathUtils;
029    
030    /**
031     * Implements Chi-Square test statistics defined in the
032     * {@link UnknownDistributionChiSquareTest} interface.
033     *
034     * @version $Id: ChiSquareTestImpl.java 1159916 2011-08-20 21:08:34Z psteitz $
035     */
036    public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
037    
038        /**
039         * Construct a ChiSquareTestImpl
040         */
041        public ChiSquareTestImpl() {
042            super();
043        }
044    
045         /**
046         * {@inheritDoc}
047         * <p><strong>Note: </strong>This implementation rescales the
048         * <code>expected</code> array if necessary to ensure that the sum of the
049         * expected and observed counts are equal.</p>
050         *
051         * @param observed array of observed frequency counts
052         * @param expected array of expected frequency counts
053         * @return chi-square test statistic
054         * @throws DimensionMismatchException if the arrays length is less than 2.
055         */
056        public double chiSquare(double[] expected, long[] observed) {
057            if (expected.length < 2) {
058                throw new DimensionMismatchException(expected.length, 2);
059            }
060            if (expected.length != observed.length) {
061                throw new DimensionMismatchException(expected.length, observed.length);
062            }
063            checkPositive(expected);
064            checkNonNegative(observed);
065            double sumExpected = 0d;
066            double sumObserved = 0d;
067            for (int i = 0; i < observed.length; i++) {
068                sumExpected += expected[i];
069                sumObserved += observed[i];
070            }
071            double ratio = 1.0d;
072            boolean rescale = false;
073            if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
074                ratio = sumObserved / sumExpected;
075                rescale = true;
076            }
077            double sumSq = 0.0d;
078            for (int i = 0; i < observed.length; i++) {
079                if (rescale) {
080                    final double dev = observed[i] - ratio * expected[i];
081                    sumSq += dev * dev / (ratio * expected[i]);
082                } else {
083                    final double dev = observed[i] - expected[i];
084                    sumSq += dev * dev / expected[i];
085                }
086            }
087            return sumSq;
088        }
089    
090        /**
091         * {@inheritDoc}
092         * <p><strong>Note: </strong>This implementation rescales the
093         * <code>expected</code> array if necessary to ensure that the sum of the
094         * expected and observed counts are equal.</p>
095         *
096         * @param observed array of observed frequency counts
097         * @param expected array of expected frequency counts
098         * @return p-value
099         * @throws MathIllegalArgumentException if preconditions are not met
100         * @throws MathException if an error occurs computing the p-value
101         */
102        public double chiSquareTest(double[] expected, long[] observed)
103            throws MathException {
104            ChiSquaredDistribution distribution =
105                new ChiSquaredDistributionImpl(expected.length - 1.0);
106            return 1.0 - distribution.cumulativeProbability(
107                chiSquare(expected, observed));
108        }
109    
110        /**
111         * {@inheritDoc}
112         * <p><strong>Note: </strong>This implementation rescales the
113         * <code>expected</code> array if necessary to ensure that the sum of the
114         * expected and observed counts are equal.</p>
115         *
116         * @param observed array of observed frequency counts
117         * @param expected array of expected frequency counts
118         * @param alpha significance level of the test
119         * @return true iff null hypothesis can be rejected with confidence
120         * 1 - alpha
121         * @throws MathIllegalArgumentException if preconditions are not met
122         * @throws MathException if an error occurs performing the test
123         */
124        public boolean chiSquareTest(double[] expected, long[] observed,
125                                     double alpha)
126            throws MathException {
127            if ((alpha <= 0) || (alpha > 0.5)) {
128                throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
129                                              alpha, 0, 0.5);
130            }
131            return chiSquareTest(expected, observed) < alpha;
132        }
133    
134        /**
135         * @param counts array representation of 2-way table
136         * @return chi-square test statistic
137         * @throws MathIllegalArgumentException if preconditions are not met.
138         */
139        public double chiSquare(long[][] counts) {
140            checkArray(counts);
141            int nRows = counts.length;
142            int nCols = counts[0].length;
143    
144            // compute row, column and total sums
145            double[] rowSum = new double[nRows];
146            double[] colSum = new double[nCols];
147            double total = 0.0d;
148            for (int row = 0; row < nRows; row++) {
149                for (int col = 0; col < nCols; col++) {
150                    rowSum[row] += counts[row][col];
151                    colSum[col] += counts[row][col];
152                    total += counts[row][col];
153                }
154            }
155    
156            // compute expected counts and chi-square
157            double sumSq = 0.0d;
158            double expected = 0.0d;
159            for (int row = 0; row < nRows; row++) {
160                for (int col = 0; col < nCols; col++) {
161                    expected = (rowSum[row] * colSum[col]) / total;
162                    sumSq += ((counts[row][col] - expected) *
163                            (counts[row][col] - expected)) / expected;
164                }
165            }
166            return sumSq;
167        }
168    
169        /**
170         * @param counts array representation of 2-way table
171         * @return p-value
172         * @throws MathIllegalArgumentException if preconditions are not met
173         * @throws MathException if an error occurs computing the p-value
174         */
175        public double chiSquareTest(long[][] counts)
176        throws MathException {
177            checkArray(counts);
178            double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
179            ChiSquaredDistribution distribution = new ChiSquaredDistributionImpl(df);
180            return 1 - distribution.cumulativeProbability(chiSquare(counts));
181        }
182    
183        /**
184         * @param counts array representation of 2-way table
185         * @param alpha significance level of the test
186         * @return true iff null hypothesis can be rejected with confidence
187         * 1 - alpha
188         * @throws MathIllegalArgumentException if preconditions are not met
189         * @throws MathException if an error occurs performing the test
190         */
191        public boolean chiSquareTest(long[][] counts, double alpha)
192        throws MathException {
193            if ((alpha <= 0) || (alpha > 0.5)) {
194                throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
195                                              alpha, 0, 0.5);
196            }
197            return chiSquareTest(counts) < alpha;
198        }
199    
200        /**
201         * @param observed1 array of observed frequency counts of the first data set
202         * @param observed2 array of observed frequency counts of the second data set
203         * @return chi-square test statistic
204         * @throws MathIllegalArgumentException if preconditions are not met
205         * @since 1.2
206         */
207        public double chiSquareDataSetsComparison(long[] observed1, long[] observed2) {
208            // Make sure lengths are same
209            if (observed1.length < 2) {
210                throw new DimensionMismatchException(observed1.length, 2);
211            }
212            if (observed1.length != observed2.length) {
213                throw new DimensionMismatchException(observed1.length, observed2.length);
214            }
215    
216            // Ensure non-negative counts
217            checkNonNegative(observed1);
218            checkNonNegative(observed2);
219    
220            // Compute and compare count sums
221            long countSum1 = 0;
222            long countSum2 = 0;
223            boolean unequalCounts = false;
224            double weight = 0.0;
225            for (int i = 0; i < observed1.length; i++) {
226                countSum1 += observed1[i];
227                countSum2 += observed2[i];
228            }
229            // Ensure neither sample is uniformly 0
230            if (countSum1 == 0) {
231                throw new MathIllegalArgumentException(LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 1);
232            }
233            if (countSum2 == 0) {
234                throw new MathIllegalArgumentException(LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 2);
235            }
236            // Compare and compute weight only if different
237            unequalCounts = countSum1 != countSum2;
238            if (unequalCounts) {
239                weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
240            }
241            // Compute ChiSquare statistic
242            double sumSq = 0.0d;
243            double dev = 0.0d;
244            double obs1 = 0.0d;
245            double obs2 = 0.0d;
246            for (int i = 0; i < observed1.length; i++) {
247                if (observed1[i] == 0 && observed2[i] == 0) {
248                    throw new MathIllegalArgumentException(LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
249                } else {
250                    obs1 = observed1[i];
251                    obs2 = observed2[i];
252                    if (unequalCounts) { // apply weights
253                        dev = obs1/weight - obs2 * weight;
254                    } else {
255                        dev = obs1 - obs2;
256                    }
257                    sumSq += (dev * dev) / (obs1 + obs2);
258                }
259            }
260            return sumSq;
261        }
262    
263        /**
264         * @param observed1 array of observed frequency counts of the first data set
265         * @param observed2 array of observed frequency counts of the second data set
266         * @return p-value
267         * @throws MathIllegalArgumentException if preconditions are not met
268         * @throws MathException if an error occurs computing the p-value
269         * @since 1.2
270         */
271        public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
272            throws MathException {
273            ChiSquaredDistribution distribution =
274                new ChiSquaredDistributionImpl((double) observed1.length - 1);
275            return 1 - distribution.cumulativeProbability(
276                    chiSquareDataSetsComparison(observed1, observed2));
277        }
278    
279        /**
280         * @param observed1 array of observed frequency counts of the first data set
281         * @param observed2 array of observed frequency counts of the second data set
282         * @param alpha significance level of the test
283         * @return true iff null hypothesis can be rejected with confidence
284         * 1 - alpha
285         * @throws MathIllegalArgumentException if preconditions are not met
286         * @throws MathException if an error occurs performing the test
287         * @since 1.2
288         */
289        public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
290                                                       double alpha)
291            throws MathException {
292            if (alpha <= 0 ||
293                alpha > 0.5) {
294                throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
295                                              alpha, 0, 0.5);
296            }
297            return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
298        }
299    
300        /**
301         * Checks to make sure that the input long[][] array is rectangular,
302         * has at least 2 rows and 2 columns, and has all non-negative entries,
303         * throwing MathIllegalArgumentException if any of these checks fail.
304         *
305         * @param in input 2-way table to check
306         * @throws MathIllegalArgumentException if the array is not valid
307         */
308        private void checkArray(long[][] in) {
309            if (in.length < 2) {
310                throw new MathIllegalArgumentException(
311                        LocalizedFormats.INSUFFICIENT_DIMENSION, in.length, 2);
312            }
313    
314            if (in[0].length < 2) {
315                throw new MathIllegalArgumentException(
316                        LocalizedFormats.INSUFFICIENT_DIMENSION, in[0].length, 2);
317            }
318    
319            checkRectangular(in);
320            checkNonNegative(in);
321    
322        }
323    
324        //---------------------  Private array methods -- should find a utility home for these
325    
326        /**
327         * Throws MathIllegalArgumentException if the input array is not rectangular.
328         *
329         * @param in array to be tested
330         * @throws NullArgumentException if input array is null
331         * @throws MathIllegalArgumentException if input array is not rectangular
332         */
333        private void checkRectangular(long[][] in)
334            throws NullArgumentException {
335            MathUtils.checkNotNull(in);
336            for (int i = 1; i < in.length; i++) {
337                if (in[i].length != in[0].length) {
338                    throw new DimensionMismatchException(LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
339                                                         in[i].length, in[0].length);
340                }
341            }
342        }
343    
344        /**
345         * Check all entries of the input array are strictly positive.
346         *
347         * @param in Array to be tested.
348         * @exception MathIllegalArgumentException if one entry is not positive.
349         */
350        private void checkPositive(double[] in) {
351            for (int i = 0; i < in.length; i++) {
352                if (in[i] <= 0) {
353                    throw new MathIllegalArgumentException(
354                            LocalizedFormats.NOT_POSITIVE_ELEMENT_AT_INDEX,
355                            i, in[i]);
356                }
357            }
358        }
359    
360        /**
361         * Check all entries of the input array are >= 0.
362         *
363         * @param in Array to be tested.
364         * @exception MathIllegalArgumentException if one entry is negative.
365         */
366        private void checkNonNegative(long[] in) {
367            for (int i = 0; i < in.length; i++) {
368                if (in[i] < 0) {
369                    throw new MathIllegalArgumentException(
370                            LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX,
371                            i, in[i]);
372                }
373            }
374        }
375    
376        /**
377         * Check all entries of the input array are >= 0.
378         *
379         * @param in Array to be tested.
380         * @exception MathIllegalArgumentException if one entry is negative.
381         */
382        private void checkNonNegative(long[][] in) {
383            for (int i = 0; i < in.length; i ++) {
384                for (int j = 0; j < in[i].length; j++) {
385                    if (in[i][j] < 0) {
386                        throw new MathIllegalArgumentException(
387                                LocalizedFormats.NEGATIVE_ELEMENT_AT_2D_INDEX,
388                                i, j, in[i][j]);
389                    }
390                }
391            }
392        }
393    }