001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.statistics.descriptive; 018 019/** 020 * Computes the standard deviation of the available values. The default implementation uses the 021 * following definition of the <em>sample standard deviation</em>: 022 * 023 * <p>\[ \sqrt{ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 } \] 024 * 025 * <p>where \( \overline{x} \) is the sample mean, and \( n \) is the number of samples. 026 * 027 * <ul> 028 * <li>The result is {@code NaN} if no values are added. 029 * <li>The result is zero if there is one value in the data set. 030 * </ul> 031 * 032 * <p>The use of the term \( n − 1 \) is called Bessel's correction. Omitting the square root, 033 * this provides an unbiased estimator of the variance of a hypothetical infinite population. If the 034 * {@link #setBiased(boolean) biased} option is enabled the normalisation factor is 035 * changed to \( \frac{1}{n} \) for a biased estimator of the <em>sample variance</em>. 036 * Note however that square root is a concave function and thus introduces negative bias 037 * (by Jensen's inequality), which depends on the distribution, and thus the corrected sample 038 * standard deviation (using Bessel's correction) is less biased, but still biased. 039 * 040 * <p>The implementation uses an exact integer sum to compute the scaled (by \( n \)) 041 * sum of squared deviations from the mean; this is normalised by the scaled correction factor. 042 * 043 * <p>\[ \frac {n \times \sum_{i=1}^n x_i^2 - (\sum_{i=1}^n x_i)^2}{n \times (n - 1)} \] 044 * 045 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 046 * This implementation does not check for overflow of the count. 047 * 048 * <p>This class is designed to work with (though does not require) 049 * {@linkplain java.util.stream streams}. 050 * 051 * <p><strong>This implementation is not thread safe.</strong> 052 * If multiple threads access an instance of this class concurrently, 053 * and at least one of the threads invokes the {@link java.util.function.IntConsumer#accept(int) accept} or 054 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 055 * 056 * <p>However, it is safe to use {@link java.util.function.IntConsumer#accept(int) accept} 057 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 058 * as {@code accumulator} and {@code combiner} functions of 059 * {@link java.util.stream.Collector Collector} on a parallel stream, 060 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 061 * provides the necessary partitioning, isolation, and merging of results for 062 * safe and efficient parallel execution. 063 * 064 * @see <a href="https://en.wikipedia.org/wiki/Standard_deviation">Standard deviation (Wikipedia)</a> 065 * @see <a href="https://en.wikipedia.org/wiki/Bessel%27s_correction">Bessel's correction</a> 066 * @see <a href="https://en.wikipedia.org/wiki/Jensen%27s_inequality">Jensen's inequality</a> 067 * @see IntVariance 068 * @since 1.1 069 */ 070public final class IntStandardDeviation implements IntStatistic, StatisticAccumulator<IntStandardDeviation> { 071 072 /** Sum of the squared values. */ 073 private final UInt128 sumSq; 074 /** Sum of the values. */ 075 private final Int128 sum; 076 /** Count of values that have been added. */ 077 private long n; 078 079 /** Flag to control if the statistic is biased, or should use a bias correction. */ 080 private boolean biased; 081 082 /** 083 * Create an instance. 084 */ 085 private IntStandardDeviation() { 086 this(UInt128.create(), Int128.create(), 0); 087 } 088 089 /** 090 * Create an instance. 091 * 092 * @param sumSq Sum of the squared values. 093 * @param sum Sum of the values. 094 * @param n Count of values that have been added. 095 */ 096 private IntStandardDeviation(UInt128 sumSq, Int128 sum, int n) { 097 this.sumSq = sumSq; 098 this.sum = sum; 099 this.n = n; 100 } 101 102 /** 103 * Creates an instance. 104 * 105 * <p>The initial result is {@code NaN}. 106 * 107 * @return {@code IntStandardDeviation} instance. 108 */ 109 public static IntStandardDeviation create() { 110 return new IntStandardDeviation(); 111 } 112 113 /** 114 * Returns an instance populated using the input {@code values}. 115 * 116 * @param values Values. 117 * @return {@code IntStandardDeviation} instance. 118 */ 119 public static IntStandardDeviation of(int... values) { 120 return createFromRange(values, 0, values.length); 121 } 122 123 /** 124 * Returns an instance populated using the specified range of {@code values}. 125 * 126 * @param values Values. 127 * @param from Inclusive start of the range. 128 * @param to Exclusive end of the range. 129 * @return {@code IntStandardDeviation} instance. 130 * @throws IndexOutOfBoundsException if the sub-range is out of bounds 131 * @since 1.2 132 */ 133 public static IntStandardDeviation ofRange(int[] values, int from, int to) { 134 Statistics.checkFromToIndex(from, to, values.length); 135 return createFromRange(values, from, to); 136 } 137 138 /** 139 * Create an instance using the specified range of {@code values}. 140 * 141 * <p>Warning: No range checks are performed. 142 * 143 * @param values Values. 144 * @param from Inclusive start of the range. 145 * @param to Exclusive end of the range. 146 * @return {@code IntStandardDeviation} instance. 147 */ 148 static IntStandardDeviation createFromRange(int[] values, int from, int to) { 149 // Small arrays can be processed using the object 150 final int length = to - from; 151 if (length < IntVariance.SMALL_SAMPLE) { 152 final IntStandardDeviation stat = new IntStandardDeviation(); 153 for (int i = from; i < to; i++) { 154 stat.accept(values[i]); 155 } 156 return stat; 157 } 158 159 // Arrays can be processed using specialised counts knowing the maximum limit 160 // for an array is 2^31 values. 161 long s = 0; 162 final UInt96 ss = UInt96.create(); 163 // Process pairs as we know two maximum value int^2 will not overflow 164 // an unsigned long. 165 final int end = from + (length & ~0x1); 166 for (int i = from; i < end; i += 2) { 167 final long x = values[i]; 168 final long y = values[i + 1]; 169 s += x + y; 170 ss.addPositive(x * x + y * y); 171 } 172 if (end < to) { 173 final long x = values[end]; 174 s += x; 175 ss.addPositive(x * x); 176 } 177 178 // Convert 179 return new IntStandardDeviation(UInt128.of(ss), Int128.of(s), length); 180 } 181 182 /** 183 * Updates the state of the statistic to reflect the addition of {@code value}. 184 * 185 * @param value Value. 186 */ 187 @Override 188 public void accept(int value) { 189 sumSq.addPositive((long) value * value); 190 sum.add(value); 191 n++; 192 } 193 194 /** 195 * Gets the standard deviation of all input values. 196 * 197 * <p>When no values have been added, the result is {@code NaN}. 198 * 199 * @return standard deviation of all values. 200 */ 201 @Override 202 public double getAsDouble() { 203 return IntVariance.computeVarianceOrStd(sumSq, sum, n, biased, true); 204 } 205 206 @Override 207 public IntStandardDeviation combine(IntStandardDeviation other) { 208 sumSq.add(other.sumSq); 209 sum.add(other.sum); 210 n += other.n; 211 return this; 212 } 213 214 /** 215 * Sets the value of the biased flag. The default value is {@code false}. The bias 216 * term refers to the computation of the variance; the standard deviation is returned 217 * as the square root of the biased or unbiased <em>sample variance</em>. For further 218 * details see {@link IntVariance#setBiased(boolean) IntVariance.setBiased}. 219 * 220 * <p>This flag only controls the final computation of the statistic. The value of 221 * this flag will not affect compatibility between instances during a 222 * {@link #combine(IntStandardDeviation) combine} operation. 223 * 224 * @param v Value. 225 * @return {@code this} instance 226 * @see IntVariance#setBiased(boolean) 227 */ 228 public IntStandardDeviation setBiased(boolean v) { 229 biased = v; 230 return this; 231 } 232}