001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.stat.descriptive;
019
020 import java.util.ArrayList;
021 import java.util.Collection;
022
023
024 import org.apache.commons.math3.TestUtils;
025 import org.apache.commons.math3.distribution.RealDistribution;
026 import org.apache.commons.math3.distribution.UniformRealDistribution;
027 import org.apache.commons.math3.distribution.IntegerDistribution;
028 import org.apache.commons.math3.distribution.UniformIntegerDistribution;
029 import org.apache.commons.math3.util.Precision;
030 import org.junit.Assert;
031 import org.junit.Test;
032
033
034 /**
035 * Test cases for {@link AggregateSummaryStatistics}
036 *
037 */
038 public class AggregateSummaryStatisticsTest {
039
040 /**
041 * Tests the standard aggregation behavior
042 */
043 @Test
044 public void testAggregation() {
045 AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
046 SummaryStatistics setOneStats = aggregate.createContributingStatistics();
047 SummaryStatistics setTwoStats = aggregate.createContributingStatistics();
048
049 Assert.assertNotNull("The set one contributing stats are null", setOneStats);
050 Assert.assertNotNull("The set two contributing stats are null", setTwoStats);
051 Assert.assertNotSame("Contributing stats objects are the same", setOneStats, setTwoStats);
052
053 setOneStats.addValue(2);
054 setOneStats.addValue(3);
055 setOneStats.addValue(5);
056 setOneStats.addValue(7);
057 setOneStats.addValue(11);
058 Assert.assertEquals("Wrong number of set one values", 5, setOneStats.getN());
059 Assert.assertTrue("Wrong sum of set one values", Precision.equals(28.0, setOneStats.getSum(), 1));
060
061 setTwoStats.addValue(2);
062 setTwoStats.addValue(4);
063 setTwoStats.addValue(8);
064 Assert.assertEquals("Wrong number of set two values", 3, setTwoStats.getN());
065 Assert.assertTrue("Wrong sum of set two values", Precision.equals(14.0, setTwoStats.getSum(), 1));
066
067 Assert.assertEquals("Wrong number of aggregate values", 8, aggregate.getN());
068 Assert.assertTrue("Wrong aggregate sum", Precision.equals(42.0, aggregate.getSum(), 1));
069 }
070
071 /**
072 * Verify that aggregating over a partition gives the same results
073 * as direct computation.
074 *
075 * 1) Randomly generate a dataset of 10-100 values
076 * from [-100, 100]
077 * 2) Divide the dataset it into 2-5 partitions
078 * 3) Create an AggregateSummaryStatistic and ContributingStatistics
079 * for each partition
080 * 4) Compare results from the AggregateSummaryStatistic with values
081 * returned by a single SummaryStatistics instance that is provided
082 * the full dataset
083 */
084 @Test
085 public void testAggregationConsistency() {
086
087 // Generate a random sample and random partition
088 double[] totalSample = generateSample();
089 double[][] subSamples = generatePartition(totalSample);
090 int nSamples = subSamples.length;
091
092 // Create aggregator and total stats for comparison
093 AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
094 SummaryStatistics totalStats = new SummaryStatistics();
095
096 // Create array of component stats
097 SummaryStatistics componentStats[] = new SummaryStatistics[nSamples];
098
099 for (int i = 0; i < nSamples; i++) {
100
101 // Make componentStats[i] a contributing statistic to aggregate
102 componentStats[i] = aggregate.createContributingStatistics();
103
104 // Add values from subsample
105 for (int j = 0; j < subSamples[i].length; j++) {
106 componentStats[i].addValue(subSamples[i][j]);
107 }
108 }
109
110 // Compute totalStats directly
111 for (int i = 0; i < totalSample.length; i++) {
112 totalStats.addValue(totalSample[i]);
113 }
114
115 /*
116 * Compare statistics in totalStats with aggregate.
117 * Note that guaranteed success of this comparison depends on the
118 * fact that <aggregate> gets values in exactly the same order
119 * as <totalStats>.
120 *
121 */
122 Assert.assertEquals(totalStats.getSummary(), aggregate.getSummary());
123
124 }
125
126 /**
127 * Test aggregate function by randomly generating a dataset of 10-100 values
128 * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
129 * partition and comparing the result of aggregate(...) applied to the collection
130 * of per-partition SummaryStatistics with a single SummaryStatistics computed
131 * over the full sample.
132 *
133 */
134 @Test
135 public void testAggregate() {
136
137 // Generate a random sample and random partition
138 double[] totalSample = generateSample();
139 double[][] subSamples = generatePartition(totalSample);
140 int nSamples = subSamples.length;
141
142 // Compute combined stats directly
143 SummaryStatistics totalStats = new SummaryStatistics();
144 for (int i = 0; i < totalSample.length; i++) {
145 totalStats.addValue(totalSample[i]);
146 }
147
148 // Now compute subsample stats individually and aggregate
149 SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples];
150 for (int i = 0; i < nSamples; i++) {
151 subSampleStats[i] = new SummaryStatistics();
152 }
153 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
154 for (int i = 0; i < nSamples; i++) {
155 for (int j = 0; j < subSamples[i].length; j++) {
156 subSampleStats[i].addValue(subSamples[i][j]);
157 }
158 aggregate.add(subSampleStats[i]);
159 }
160
161 // Compare values
162 StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
163 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
164 }
165
166
167 @Test
168 public void testAggregateDegenerate() {
169 double[] totalSample = {1, 2, 3, 4, 5};
170 double[][] subSamples = {{1}, {2}, {3}, {4}, {5}};
171
172 // Compute combined stats directly
173 SummaryStatistics totalStats = new SummaryStatistics();
174 for (int i = 0; i < totalSample.length; i++) {
175 totalStats.addValue(totalSample[i]);
176 }
177
178 // Now compute subsample stats individually and aggregate
179 SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
180 for (int i = 0; i < 5; i++) {
181 subSampleStats[i] = new SummaryStatistics();
182 }
183 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
184 for (int i = 0; i < 5; i++) {
185 for (int j = 0; j < subSamples[i].length; j++) {
186 subSampleStats[i].addValue(subSamples[i][j]);
187 }
188 aggregate.add(subSampleStats[i]);
189 }
190
191 // Compare values
192 StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
193 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
194 }
195
196 @Test
197 public void testAggregateSpecialValues() {
198 double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5};
199 double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}};
200
201 // Compute combined stats directly
202 SummaryStatistics totalStats = new SummaryStatistics();
203 for (int i = 0; i < totalSample.length; i++) {
204 totalStats.addValue(totalSample[i]);
205 }
206
207 // Now compute subsample stats individually and aggregate
208 SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
209 for (int i = 0; i < 4; i++) {
210 subSampleStats[i] = new SummaryStatistics();
211 }
212 Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
213 for (int i = 0; i < 4; i++) {
214 for (int j = 0; j < subSamples[i].length; j++) {
215 subSampleStats[i].addValue(subSamples[i][j]);
216 }
217 aggregate.add(subSampleStats[i]);
218 }
219
220 // Compare values
221 StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
222 assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
223
224 }
225
226 /**
227 * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
228 * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
229 * have to agree exactly, delta is used only for sum, mean, variance, std dev.
230 */
231 protected static void assertEquals(StatisticalSummary expected, StatisticalSummary observed, double delta) {
232 TestUtils.assertEquals(expected.getMax(), observed.getMax(), 0);
233 TestUtils.assertEquals(expected.getMin(), observed.getMin(), 0);
234 Assert.assertEquals(expected.getN(), observed.getN());
235 TestUtils.assertEquals(expected.getSum(), observed.getSum(), delta);
236 TestUtils.assertEquals(expected.getMean(), observed.getMean(), delta);
237 TestUtils.assertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta);
238 TestUtils.assertEquals(expected.getVariance(), observed.getVariance(), delta);
239 }
240
241
242 /**
243 * Generates a random sample of double values.
244 * Sample size is random, between 10 and 100 and values are
245 * uniformly distributed over [-100, 100].
246 *
247 * @return array of random double values
248 */
249 private double[] generateSample() {
250 final IntegerDistribution size = new UniformIntegerDistribution(10, 100);
251 final RealDistribution randomData = new UniformRealDistribution(-100, 100);
252 final int sampleSize = size.sample();
253 final double[] out = randomData.sample(sampleSize);
254 return out;
255 }
256
257 /**
258 * Generates a partition of <sample> into up to 5 sequentially selected
259 * subsamples with randomly selected partition points.
260 *
261 * @param sample array to partition
262 * @return rectangular array with rows = subsamples
263 */
264 private double[][] generatePartition(double[] sample) {
265 final int length = sample.length;
266 final double[][] out = new double[5][];
267 int cur = 0; // beginning of current partition segment
268 int offset = 0; // end of current partition segment
269 int sampleCount = 0; // number of segments defined
270 for (int i = 0; i < 5; i++) {
271 if (cur == length || offset == length) {
272 break;
273 }
274 final int next;
275 if (i == 4 || cur == length - 1) {
276 next = length - 1;
277 } else {
278 next = (new UniformIntegerDistribution(cur, length - 1)).sample();
279 }
280 final int subLength = next - cur + 1;
281 out[i] = new double[subLength];
282 System.arraycopy(sample, offset, out[i], 0, subLength);
283 cur = next + 1;
284 sampleCount++;
285 offset += subLength;
286 }
287 if (sampleCount < 5) {
288 double[][] out2 = new double[sampleCount][];
289 for (int j = 0; j < sampleCount; j++) {
290 final int curSize = out[j].length;
291 out2[j] = new double[curSize];
292 System.arraycopy(out[j], 0, out2[j], 0, curSize);
293 }
294 return out2;
295 } else {
296 return out;
297 }
298 }
299
300 }