View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math3.analysis.function;
19  
20  import java.util.Arrays;
21  
22  import org.apache.commons.math3.analysis.FunctionUtils;
23  import org.apache.commons.math3.analysis.UnivariateFunction;
24  import org.apache.commons.math3.analysis.DifferentiableUnivariateFunction;
25  import org.apache.commons.math3.analysis.ParametricUnivariateFunction;
26  import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
27  import org.apache.commons.math3.analysis.differentiation.UnivariateDifferentiableFunction;
28  import org.apache.commons.math3.exception.NullArgumentException;
29  import org.apache.commons.math3.exception.DimensionMismatchException;
30  import org.apache.commons.math3.util.FastMath;
31  
32  /**
33   * <a href="http://en.wikipedia.org/wiki/Sigmoid_function">
34   *  Sigmoid</a> function.
35   * It is the inverse of the {@link Logit logit} function.
36   * A more flexible version, the generalised logistic, is implemented
37   * by the {@link Logistic} class.
38   *
39   * @since 3.0
40   * @version $Id: Sigmoid.java 1513430 2013-08-13 10:46:48Z erans $
41   */
42  public class Sigmoid implements UnivariateDifferentiableFunction, DifferentiableUnivariateFunction {
43      /** Lower asymptote. */
44      private final double lo;
45      /** Higher asymptote. */
46      private final double hi;
47  
48      /**
49       * Usual sigmoid function, where the lower asymptote is 0 and the higher
50       * asymptote is 1.
51       */
52      public Sigmoid() {
53          this(0, 1);
54      }
55  
56      /**
57       * Sigmoid function.
58       *
59       * @param lo Lower asymptote.
60       * @param hi Higher asymptote.
61       */
62      public Sigmoid(double lo,
63                     double hi) {
64          this.lo = lo;
65          this.hi = hi;
66      }
67  
68      /** {@inheritDoc}
69       * @deprecated as of 3.1, replaced by {@link #value(DerivativeStructure)}
70       */
71      @Deprecated
72      public UnivariateFunction derivative() {
73          return FunctionUtils.toDifferentiableUnivariateFunction(this).derivative();
74      }
75  
76      /** {@inheritDoc} */
77      public double value(double x) {
78          return value(x, lo, hi);
79      }
80  
81      /**
82       * Parametric function where the input array contains the parameters of
83       * the {@link Sigmoid#Sigmoid(double,double) sigmoid function}, ordered
84       * as follows:
85       * <ul>
86       *  <li>Lower asymptote</li>
87       *  <li>Higher asymptote</li>
88       * </ul>
89       */
90      public static class Parametric implements ParametricUnivariateFunction {
91          /**
92           * Computes the value of the sigmoid at {@code x}.
93           *
94           * @param x Value for which the function must be computed.
95           * @param param Values of lower asymptote and higher asymptote.
96           * @return the value of the function.
97           * @throws NullArgumentException if {@code param} is {@code null}.
98           * @throws DimensionMismatchException if the size of {@code param} is
99           * not 2.
100          */
101         public double value(double x, double ... param)
102             throws NullArgumentException,
103                    DimensionMismatchException {
104             validateParameters(param);
105             return Sigmoid.value(x, param[0], param[1]);
106         }
107 
108         /**
109          * Computes the value of the gradient at {@code x}.
110          * The components of the gradient vector are the partial
111          * derivatives of the function with respect to each of the
112          * <em>parameters</em> (lower asymptote and higher asymptote).
113          *
114          * @param x Value at which the gradient must be computed.
115          * @param param Values for lower asymptote and higher asymptote.
116          * @return the gradient vector at {@code x}.
117          * @throws NullArgumentException if {@code param} is {@code null}.
118          * @throws DimensionMismatchException if the size of {@code param} is
119          * not 2.
120          */
121         public double[] gradient(double x, double ... param)
122             throws NullArgumentException,
123                    DimensionMismatchException {
124             validateParameters(param);
125 
126             final double invExp1 = 1 / (1 + FastMath.exp(-x));
127 
128             return new double[] { 1 - invExp1, invExp1 };
129         }
130 
131         /**
132          * Validates parameters to ensure they are appropriate for the evaluation of
133          * the {@link #value(double,double[])} and {@link #gradient(double,double[])}
134          * methods.
135          *
136          * @param param Values for lower and higher asymptotes.
137          * @throws NullArgumentException if {@code param} is {@code null}.
138          * @throws DimensionMismatchException if the size of {@code param} is
139          * not 2.
140          */
141         private void validateParameters(double[] param)
142             throws NullArgumentException,
143                    DimensionMismatchException {
144             if (param == null) {
145                 throw new NullArgumentException();
146             }
147             if (param.length != 2) {
148                 throw new DimensionMismatchException(param.length, 2);
149             }
150         }
151     }
152 
153     /**
154      * @param x Value at which to compute the sigmoid.
155      * @param lo Lower asymptote.
156      * @param hi Higher asymptote.
157      * @return the value of the sigmoid function at {@code x}.
158      */
159     private static double value(double x,
160                                 double lo,
161                                 double hi) {
162         return lo + (hi - lo) / (1 + FastMath.exp(-x));
163     }
164 
165     /** {@inheritDoc}
166      * @since 3.1
167      */
168     public DerivativeStructure value(final DerivativeStructure t)
169         throws DimensionMismatchException {
170 
171         double[] f = new double[t.getOrder() + 1];
172         final double exp = FastMath.exp(-t.getValue());
173         if (Double.isInfinite(exp)) {
174 
175             // special handling near lower boundary, to avoid NaN
176             f[0] = lo;
177             Arrays.fill(f, 1, f.length, 0.0);
178 
179         } else {
180 
181             // the nth order derivative of sigmoid has the form:
182             // dn(sigmoid(x)/dxn = P_n(exp(-x)) / (1+exp(-x))^(n+1)
183             // where P_n(t) is a degree n polynomial with normalized higher term
184             // P_0(t) = 1, P_1(t) = t, P_2(t) = t^2 - t, P_3(t) = t^3 - 4 t^2 + t...
185             // the general recurrence relation for P_n is:
186             // P_n(x) = n t P_(n-1)(t) - t (1 + t) P_(n-1)'(t)
187             final double[] p = new double[f.length];
188 
189             final double inv   = 1 / (1 + exp);
190             double coeff = hi - lo;
191             for (int n = 0; n < f.length; ++n) {
192 
193                 // update and evaluate polynomial P_n(t)
194                 double v = 0;
195                 p[n] = 1;
196                 for (int k = n; k >= 0; --k) {
197                     v = v * exp + p[k];
198                     if (k > 1) {
199                         p[k - 1] = (n - k + 2) * p[k - 2] - (k - 1) * p[k - 1];
200                     } else {
201                         p[0] = 0;
202                     }
203                 }
204 
205                 coeff *= inv;
206                 f[n]   = coeff * v;
207 
208             }
209 
210             // fix function value
211             f[0] += lo;
212 
213         }
214 
215         return t.compose(f);
216 
217     }
218 
219 }