001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.regression;
019    import java.io.Serializable;
020    
021    import org.apache.commons.math.MathException;
022    import org.apache.commons.math.exception.OutOfRangeException;
023    import org.apache.commons.math.distribution.TDistribution;
024    import org.apache.commons.math.distribution.TDistributionImpl;
025    import org.apache.commons.math.exception.MathIllegalArgumentException;
026    import org.apache.commons.math.exception.NoDataException;
027    import org.apache.commons.math.exception.util.LocalizedFormats;
028    import org.apache.commons.math.util.FastMath;
029    import org.apache.commons.math.util.MathUtils;
030    
031    /**
032     * Estimates an ordinary least squares regression model
033     * with one independent variable.
034     * <p>
035     * <code> y = intercept + slope * x  </code></p>
036     * <p>
037     * Standard errors for <code>intercept</code> and <code>slope</code> are
038     * available as well as ANOVA, r-square and Pearson's r statistics.</p>
039     * <p>
040     * Observations (x,y pairs) can be added to the model one at a time or they
041     * can be provided in a 2-dimensional array.  The observations are not stored
042     * in memory, so there is no limit to the number of observations that can be
043     * added to the model.</p>
044     * <p>
045     * <strong>Usage Notes</strong>: <ul>
046     * <li> When there are fewer than two observations in the model, or when
047     * there is no variation in the x values (i.e. all x values are the same)
048     * all statistics return <code>NaN</code>. At least two observations with
049     * different x coordinates are required to estimate a bivariate regression
050     * model.
051     * </li>
052     * <li> getters for the statistics always compute values based on the current
053     * set of observations -- i.e., you can get statistics, then add more data
054     * and get updated statistics without using a new instance.  There is no
055     * "compute" method that updates all statistics.  Each of the getters performs
056     * the necessary computations to return the requested statistic.</li>
057     * </ul></p>
058     *
059     * @version $Id: SimpleRegression.java 1174509 2011-09-23 03:36:11Z gregs $
060     */
061    public class SimpleRegression implements Serializable, UpdatingMultipleLinearRegression {
062    
063        /** Serializable version identifier */
064        private static final long serialVersionUID = -3004689053607543335L;
065    
066        /** sum of x values */
067        private double sumX = 0d;
068    
069        /** total variation in x (sum of squared deviations from xbar) */
070        private double sumXX = 0d;
071    
072        /** sum of y values */
073        private double sumY = 0d;
074    
075        /** total variation in y (sum of squared deviations from ybar) */
076        private double sumYY = 0d;
077    
078        /** sum of products */
079        private double sumXY = 0d;
080    
081        /** number of observations */
082        private long n = 0;
083    
084        /** mean of accumulated x values, used in updating formulas */
085        private double xbar = 0;
086    
087        /** mean of accumulated y values, used in updating formulas */
088        private double ybar = 0;
089    
090        /** include an intercept or not */
091        private final boolean hasIntercept;
092        // ---------------------Public methods--------------------------------------
093    
094        /**
095         * Create an empty SimpleRegression instance
096         */
097        public SimpleRegression() {
098            this(true);
099        }
100        /**
101        * Secondary constructor which allows the user the ability to include/exclude const
102        * @param includeIntercept boolean flag, true includes an intercept
103        */
104        public SimpleRegression(boolean includeIntercept) {
105            super();
106            hasIntercept = includeIntercept;
107        }
108    
109        /**
110         * Adds the observation (x,y) to the regression data set.
111         * <p>
112         * Uses updating formulas for means and sums of squares defined in
113         * "Algorithms for Computing the Sample Variance: Analysis and
114         * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.
115         * 1983, American Statistician, vol. 37, pp. 242-247, referenced in
116         * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985.</p>
117         *
118         *
119         * @param x independent variable value
120         * @param y dependent variable value
121         */
122        public void addData(final double x,final double y) {
123            if (n == 0) {
124                xbar = x;
125                ybar = y;
126            } else {
127                if( hasIntercept ){
128                    final double fact1 = 1.0 + (double) n;
129                    final double fact2 = ((double) n) / (1.0 + (double) n);
130                    final double dx = x - xbar;
131                    final double dy = y - ybar;
132                    sumXX += dx * dx * fact2;
133                    sumYY += dy * dy * fact2;
134                    sumXY += dx * dy * fact2;
135                    xbar += dx / fact1;
136                    ybar += dy / fact1;
137                }
138             }
139            if( !hasIntercept ){
140                sumXX += x * x ;
141                sumYY += y * y ;
142                sumXY += x * y ;
143            }
144            sumX += x;
145            sumY += y;
146            n++;
147            return;
148        }
149    
150    
151        /**
152         * Removes the observation (x,y) from the regression data set.
153         * <p>
154         * Mirrors the addData method.  This method permits the use of
155         * SimpleRegression instances in streaming mode where the regression
156         * is applied to a sliding "window" of observations, however the caller is
157         * responsible for maintaining the set of observations in the window.</p>
158         *
159         * The method has no effect if there are no points of data (i.e. n=0)
160         *
161         * @param x independent variable value
162         * @param y dependent variable value
163         */
164        public void removeData(final double x,final double y) {
165            if (n > 0) {
166                if (hasIntercept) {
167                    final double fact1 = (double) n - 1.0;
168                    final double fact2 = ((double) n) / ((double) n - 1.0);
169                    final double dx = x - xbar;
170                    final double dy = y - ybar;
171                    sumXX -= dx * dx * fact2;
172                    sumYY -= dy * dy * fact2;
173                    sumXY -= dx * dy * fact2;
174                    xbar -= dx / fact1;
175                    ybar -= dy / fact1;
176                } else {
177                    final double fact1 = (double) n - 1.0;
178                    sumXX -= x * x;
179                    sumYY -= y * y;
180                    sumXY -= x * y;
181                    xbar -= x / fact1;
182                    ybar -= y / fact1;
183                }
184                 sumX -= x;
185                 sumY -= y;
186                 n--;
187            }
188            return;
189        }
190    
191        /**
192         * Adds the observations represented by the elements in
193         * <code>data</code>.
194         * <p>
195         * <code>(data[0][0],data[0][1])</code> will be the first observation, then
196         * <code>(data[1][0],data[1][1])</code>, etc.</p>
197         * <p>
198         * This method does not replace data that has already been added.  The
199         * observations represented by <code>data</code> are added to the existing
200         * dataset.</p>
201         * <p>
202         * To replace all data, use <code>clear()</code> before adding the new
203         * data.</p>
204         *
205         * @param data array of observations to be added
206         * @throws ModelSpecificationException if the length of {@code data[i]} is not
207         * greater than or equal to 2
208         */
209        public void addData(final double[][] data) {
210            for (int i = 0; i < data.length; i++) {
211                if( data[i].length < 2 ){
212                   throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
213                        data[i].length, 2);
214                }
215                addData(data[i][0], data[i][1]);
216            }
217            return;
218        }
219    
220        /**
221         * Adds one observation to the regression model.
222         *
223         * @param x the independent variables which form the design matrix
224         * @param y the dependent or response variable
225         * @throws ModelSpecificationException if the length of {@code x} does not equal
226         * the number of independent variables in the model
227         */
228        public void addObservation(final double[] x,final double y) throws ModelSpecificationException{
229            if( x == null || x.length == 0 ){
230                throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,x!=null?x.length:0, 1);
231            }
232            addData( x[0], y );
233            return;
234        }
235    
236        /**
237         * Adds a series of observations to the regression model. The lengths of
238         * x and y must be the same and x must be rectangular.
239         *
240         * @param x a series of observations on the independent variables
241         * @param y a series of observations on the dependent variable
242         * The length of x and y must be the same
243         * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
244         * the length of {@code y} or does not contain sufficient data to estimate the model
245         */
246        public void addObservations(final double[][] x,final double[] y) {
247            if ((x == null) || (y == null) || (x.length != y.length)) {
248                throw new ModelSpecificationException(
249                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
250                      (x == null) ? 0 : x.length,
251                      (y == null) ? 0 : y.length);
252            }
253            boolean obsOk=true;
254            for( int i = 0 ; i < x.length; i++){
255                if( x[i] == null || x[i].length == 0 ){
256                    obsOk = false;
257                }
258            }
259            if( !obsOk ){
260                throw new ModelSpecificationException(
261                      LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
262                      0, 1);
263            }
264            for( int i = 0 ; i < x.length ; i++){
265                addData( x[i][0], y[i] );
266            }
267            return;
268        }
269    
270        /**
271         * Removes observations represented by the elements in <code>data</code>.
272          * <p>
273         * If the array is larger than the current n, only the first n elements are
274         * processed.  This method permits the use of SimpleRegression instances in
275         * streaming mode where the regression is applied to a sliding "window" of
276         * observations, however the caller is responsible for maintaining the set
277         * of observations in the window.</p>
278         * <p>
279         * To remove all data, use <code>clear()</code>.</p>
280         *
281         * @param data array of observations to be removed
282         */
283        public void removeData(double[][] data) {
284            for (int i = 0; i < data.length && n > 0; i++) {
285                removeData(data[i][0], data[i][1]);
286            }
287        }
288    
289        /**
290         * Clears all data from the model.
291         */
292        public void clear() {
293            sumX = 0d;
294            sumXX = 0d;
295            sumY = 0d;
296            sumYY = 0d;
297            sumXY = 0d;
298            n = 0;
299        }
300    
301        /**
302         * Returns the number of observations that have been added to the model.
303         *
304         * @return n number of observations that have been added.
305         */
306        public long getN() {
307            return n;
308        }
309    
310        /**
311         * Returns the "predicted" <code>y</code> value associated with the
312         * supplied <code>x</code> value,  based on the data that has been
313         * added to the model when this method is activated.
314         * <p>
315         * <code> predict(x) = intercept + slope * x </code></p>
316         * <p>
317         * <strong>Preconditions</strong>: <ul>
318         * <li>At least two observations (with at least two different x values)
319         * must have been added before invoking this method. If this method is
320         * invoked before a model can be estimated, <code>Double,NaN</code> is
321         * returned.
322         * </li></ul></p>
323         *
324         * @param x input <code>x</code> value
325         * @return predicted <code>y</code> value
326         */
327        public double predict(final double x) {
328            final double b1 = getSlope();
329            if (hasIntercept) {
330                return getIntercept(b1) + b1 * x;
331            }
332            return b1 * x;
333        }
334    
335        /**
336         * Returns the intercept of the estimated regression line.
337         * <p>
338         * The least squares estimate of the intercept is computed using the
339         * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
340         * The intercept is sometimes denoted b0.</p>
341         * <p>
342         * <strong>Preconditions</strong>: <ul>
343         * <li>At least two observations (with at least two different x values)
344         * must have been added before invoking this method. If this method is
345         * invoked before a model can be estimated, <code>Double,NaN</code> is
346         * returned.
347         * </li></ul></p>
348         *
349         * @return the intercept of the regression line
350         */
351        public double getIntercept() {
352            return hasIntercept ? getIntercept(getSlope()) : 0.0;
353        }
354    
355        /**
356         * Returns true if a constant has been included false otherwise.
357         *
358         * @return true if constant exists, false otherwise
359         */
360        public boolean hasIntercept() {
361            return hasIntercept;
362        }
363    
364        /**
365        * Returns the slope of the estimated regression line.
366        * <p>
367        * The least squares estimate of the slope is computed using the
368        * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
369        * The slope is sometimes denoted b1.</p>
370        * <p>
371        * <strong>Preconditions</strong>: <ul>
372        * <li>At least two observations (with at least two different x values)
373        * must have been added before invoking this method. If this method is
374        * invoked before a model can be estimated, <code>Double.NaN</code> is
375        * returned.
376        * </li></ul></p>
377        *
378        * @return the slope of the regression line
379        */
380        public double getSlope() {
381            if (n < 2) {
382                return Double.NaN; //not enough data
383            }
384            if (FastMath.abs(sumXX) < 10 * Double.MIN_VALUE) {
385                return Double.NaN; //not enough variation in x
386            }
387            return sumXY / sumXX;
388        }
389    
390        /**
391         * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
392         * sum of squared errors</a> (SSE) associated with the regression
393         * model.
394         * <p>
395         * The sum is computed using the computational formula</p>
396         * <p>
397         * <code>SSE = SYY - (SXY * SXY / SXX)</code></p>
398         * <p>
399         * where <code>SYY</code> is the sum of the squared deviations of the y
400         * values about their mean, <code>SXX</code> is similarly defined and
401         * <code>SXY</code> is the sum of the products of x and y mean deviations.
402         * </p><p>
403         * The sums are accumulated using the updating algorithm referenced in
404         * {@link #addData}.</p>
405         * <p>
406         * The return value is constrained to be non-negative - i.e., if due to
407         * rounding errors the computational formula returns a negative result,
408         * 0 is returned.</p>
409         * <p>
410         * <strong>Preconditions</strong>: <ul>
411         * <li>At least two observations (with at least two different x values)
412         * must have been added before invoking this method. If this method is
413         * invoked before a model can be estimated, <code>Double,NaN</code> is
414         * returned.
415         * </li></ul></p>
416         *
417         * @return sum of squared errors associated with the regression model
418         */
419        public double getSumSquaredErrors() {
420            return FastMath.max(0d, sumYY - sumXY * sumXY / sumXX);
421        }
422    
423        /**
424         * Returns the sum of squared deviations of the y values about their mean.
425         * <p>
426         * This is defined as SSTO
427         * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
428         * <p>
429         * If <code>n < 2</code>, this returns <code>Double.NaN</code>.</p>
430         *
431         * @return sum of squared deviations of y values
432         */
433        public double getTotalSumSquares() {
434            if (n < 2) {
435                return Double.NaN;
436            }
437            return sumYY;
438        }
439    
440        /**
441         * Returns the sum of squared deviations of the x values about their mean.
442         *
443         * If <code>n < 2</code>, this returns <code>Double.NaN</code>.</p>
444         *
445         * @return sum of squared deviations of x values
446         */
447        public double getXSumSquares() {
448            if (n < 2) {
449                return Double.NaN;
450            }
451            return sumXX;
452        }
453    
454        /**
455         * Returns the sum of crossproducts, x<sub>i</sub>*y<sub>i</sub>.
456         *
457         * @return sum of cross products
458         */
459        public double getSumOfCrossProducts() {
460            return sumXY;
461        }
462    
463        /**
464         * Returns the sum of squared deviations of the predicted y values about
465         * their mean (which equals the mean of y).
466         * <p>
467         * This is usually abbreviated SSR or SSM.  It is defined as SSM
468         * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
469         * <p>
470         * <strong>Preconditions</strong>: <ul>
471         * <li>At least two observations (with at least two different x values)
472         * must have been added before invoking this method. If this method is
473         * invoked before a model can be estimated, <code>Double.NaN</code> is
474         * returned.
475         * </li></ul></p>
476         *
477         * @return sum of squared deviations of predicted y values
478         */
479        public double getRegressionSumSquares() {
480            return getRegressionSumSquares(getSlope());
481        }
482    
483        /**
484         * Returns the sum of squared errors divided by the degrees of freedom,
485         * usually abbreviated MSE.
486         * <p>
487         * If there are fewer than <strong>three</strong> data pairs in the model,
488         * or if there is no variation in <code>x</code>, this returns
489         * <code>Double.NaN</code>.</p>
490         *
491         * @return sum of squared deviations of y values
492         */
493        public double getMeanSquareError() {
494            if (n < 3) {
495                return Double.NaN;
496            }
497            return hasIntercept ? (getSumSquaredErrors() / (n - 2)) : (getSumSquaredErrors() / (n - 1));
498        }
499    
500        /**
501         * Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">
502         * Pearson's product moment correlation coefficient</a>,
503         * usually denoted r.
504         * <p>
505         * <strong>Preconditions</strong>: <ul>
506         * <li>At least two observations (with at least two different x values)
507         * must have been added before invoking this method. If this method is
508         * invoked before a model can be estimated, <code>Double,NaN</code> is
509         * returned.
510         * </li></ul></p>
511         *
512         * @return Pearson's r
513         */
514        public double getR() {
515            double b1 = getSlope();
516            double result = FastMath.sqrt(getRSquare());
517            if (b1 < 0) {
518                result = -result;
519            }
520            return result;
521        }
522    
523        /**
524         * Returns the <a href="http://www.xycoon.com/coefficient1.htm">
525         * coefficient of determination</a>,
526         * usually denoted r-square.
527         * <p>
528         * <strong>Preconditions</strong>: <ul>
529         * <li>At least two observations (with at least two different x values)
530         * must have been added before invoking this method. If this method is
531         * invoked before a model can be estimated, <code>Double,NaN</code> is
532         * returned.
533         * </li></ul></p>
534         *
535         * @return r-square
536         */
537        public double getRSquare() {
538            double ssto = getTotalSumSquares();
539            return (ssto - getSumSquaredErrors()) / ssto;
540        }
541    
542        /**
543         * Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
544         * standard error of the intercept estimate</a>,
545         * usually denoted s(b0).
546         * <p>
547         * If there are fewer that <strong>three</strong> observations in the
548         * model, or if there is no variation in x, this returns
549         * <code>Double.NaN</code>.</p> Additionally, a <code>Double.NaN</code> is
550         * returned when the intercept is constrained to be zero
551         *
552         * @return standard error associated with intercept estimate
553         */
554        public double getInterceptStdErr() {
555            if( !hasIntercept ){
556                return Double.NaN;
557            }
558            return FastMath.sqrt(
559                getMeanSquareError() * ((1d / (double) n) + (xbar * xbar) / sumXX));
560        }
561    
562        /**
563         * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
564         * error of the slope estimate</a>,
565         * usually denoted s(b1).
566         * <p>
567         * If there are fewer that <strong>three</strong> data pairs in the model,
568         * or if there is no variation in x, this returns <code>Double.NaN</code>.
569         * </p>
570         *
571         * @return standard error associated with slope estimate
572         */
573        public double getSlopeStdErr() {
574            return FastMath.sqrt(getMeanSquareError() / sumXX);
575        }
576    
577        /**
578         * Returns the half-width of a 95% confidence interval for the slope
579         * estimate.
580         * <p>
581         * The 95% confidence interval is</p>
582         * <p>
583         * <code>(getSlope() - getSlopeConfidenceInterval(),
584         * getSlope() + getSlopeConfidenceInterval())</code></p>
585         * <p>
586         * If there are fewer that <strong>three</strong> observations in the
587         * model, or if there is no variation in x, this returns
588         * <code>Double.NaN</code>.</p>
589         * <p>
590         * <strong>Usage Note</strong>:<br>
591         * The validity of this statistic depends on the assumption that the
592         * observations included in the model are drawn from a
593         * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
594         * Bivariate Normal Distribution</a>.</p>
595         *
596         * @return half-width of 95% confidence interval for the slope estimate
597         * @throws MathException if the confidence interval can not be computed.
598         */
599        public double getSlopeConfidenceInterval() throws MathException {
600            return getSlopeConfidenceInterval(0.05d);
601        }
602    
603        /**
604         * Returns the half-width of a (100-100*alpha)% confidence interval for
605         * the slope estimate.
606         * <p>
607         * The (100-100*alpha)% confidence interval is </p>
608         * <p>
609         * <code>(getSlope() - getSlopeConfidenceInterval(),
610         * getSlope() + getSlopeConfidenceInterval())</code></p>
611         * <p>
612         * To request, for example, a 99% confidence interval, use
613         * <code>alpha = .01</code></p>
614         * <p>
615         * <strong>Usage Note</strong>:<br>
616         * The validity of this statistic depends on the assumption that the
617         * observations included in the model are drawn from a
618         * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
619         * Bivariate Normal Distribution</a>.</p>
620         * <p>
621         * <strong> Preconditions:</strong><ul>
622         * <li>If there are fewer that <strong>three</strong> observations in the
623         * model, or if there is no variation in x, this returns
624         * <code>Double.NaN</code>.
625         * </li>
626         * <li><code>(0 < alpha < 1)</code>; otherwise an
627         * <code>IllegalArgumentException</code> is thrown.
628         * </li></ul></p>
629         *
630         * @param alpha the desired significance level
631         * @return half-width of 95% confidence interval for the slope estimate
632         * @throws MathException if the confidence interval can not be computed.
633         */
634        public double getSlopeConfidenceInterval(final double alpha)
635            throws MathException {
636            if (alpha >= 1 || alpha <= 0) {
637                throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
638                                              alpha, 0, 1);
639            }
640            TDistribution distribution = new TDistributionImpl(n - 2);
641            return getSlopeStdErr() *
642                distribution.inverseCumulativeProbability(1d - alpha / 2d);
643        }
644    
645        /**
646         * Returns the significance level of the slope (equiv) correlation.
647         * <p>
648         * Specifically, the returned value is the smallest <code>alpha</code>
649         * such that the slope confidence interval with significance level
650         * equal to <code>alpha</code> does not include <code>0</code>.
651         * On regression output, this is often denoted <code>Prob(|t| > 0)</code>
652         * </p><p>
653         * <strong>Usage Note</strong>:<br>
654         * The validity of this statistic depends on the assumption that the
655         * observations included in the model are drawn from a
656         * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
657         * Bivariate Normal Distribution</a>.</p>
658         * <p>
659         * If there are fewer that <strong>three</strong> observations in the
660         * model, or if there is no variation in x, this returns
661         * <code>Double.NaN</code>.</p>
662         *
663         * @return significance level for slope/correlation
664         * @throws MathException if the significance level can not be computed.
665         */
666        public double getSignificance() throws MathException {
667            TDistribution distribution = new TDistributionImpl(n - 2);
668            return 2d * (1.0 - distribution.cumulativeProbability(
669                        FastMath.abs(getSlope()) / getSlopeStdErr()));
670        }
671    
672        // ---------------------Private methods-----------------------------------
673    
674        /**
675        * Returns the intercept of the estimated regression line, given the slope.
676        * <p>
677        * Will return <code>NaN</code> if slope is <code>NaN</code>.</p>
678        *
679        * @param slope current slope
680        * @return the intercept of the regression line
681        */
682        private double getIntercept(final double slope) {
683          if( hasIntercept){
684            return (sumY - slope * sumX) / n;
685          }
686          return 0.0;
687        }
688    
689        /**
690         * Computes SSR from b1.
691         *
692         * @param slope regression slope estimate
693         * @return sum of squared deviations of predicted y values
694         */
695        private double getRegressionSumSquares(final double slope) {
696            return slope * slope * sumXX;
697        }
698    
699        /**
700         * Performs a regression on data present in buffers and outputs a RegressionResults object
701         * @return RegressionResults acts as a container of regression output
702         * @throws ModelSpecificationException if the model is not correctly specified
703         */
704        public RegressionResults regress() throws ModelSpecificationException{
705            if( hasIntercept ){
706              if( n < 3 ){
707                  throw new NoDataException( LocalizedFormats.NOT_ENOUGH_DATA_REGRESSION );
708              }
709              if( FastMath.abs( sumXX ) > MathUtils.SAFE_MIN ){
710                  final double[] params = new double[]{ getIntercept(), getSlope() };
711                  final double mse = getMeanSquareError();
712                  final double _syy = sumYY + sumY * sumY / ((double) n);
713                  final double[] vcv = new double[]{
714                    mse * (xbar *xbar /sumXX + 1.0 / ((double) n)),
715                    -xbar*mse/sumXX,
716                    mse/sumXX };
717                  return new RegressionResults(
718                          params, new double[][]{vcv}, true, n, 2,
719                          sumY, _syy, getSumSquaredErrors(),true,false);
720              }else{
721                  final double[] params = new double[]{ sumY/((double) n), Double.NaN };
722                  //final double mse = getMeanSquareError();
723                  final double[] vcv = new double[]{
724                    ybar / ((double) n - 1.0),
725                    Double.NaN,
726                    Double.NaN };
727                  return new RegressionResults(
728                          params, new double[][]{vcv}, true, n, 1,
729                          sumY, sumYY, getSumSquaredErrors(),true,false);
730              }
731            }else{
732              if( n < 2 ){
733                  throw new NoDataException( LocalizedFormats.NOT_ENOUGH_DATA_REGRESSION );
734              }
735              if( !Double.isNaN(sumXX) ){
736              final double[] vcv = new double[]{ getMeanSquareError() / sumXX };
737              final double[] params = new double[]{ sumXY/sumXX };
738              return new RegressionResults(
739                          params, new double[][]{vcv}, true, n, 1,
740                          sumY, sumYY, getSumSquaredErrors(),false,false);
741              }else{
742              final double[] vcv = new double[]{Double.NaN };
743              final double[] params = new double[]{ Double.NaN };
744              return new RegressionResults(
745                          params, new double[][]{vcv}, true, n, 1,
746                          Double.NaN, Double.NaN, Double.NaN,false,false);
747              }
748            }
749        }
750    
751        /**
752         * Performs a regression on data present in buffers including only regressors
753         * indexed in variablesToInclude and outputs a RegressionResults object
754         * @param variablesToInclude an array of indices of regressors to include
755         * @return RegressionResults acts as a container of regression output
756         * @throws ModelSpecificationException if the model is not correctly specified
757         * @throws MathIllegalArgumentException if the variablesToInclude array is null or zero length
758         * @throws OutOfRangeException if a requested variable is not present in model
759         */
760        public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException{
761            if( variablesToInclude == null || variablesToInclude.length == 0){
762              throw new MathIllegalArgumentException(LocalizedFormats.ARRAY_ZERO_LENGTH_OR_NULL_NOTALLOWED);
763            }
764            if( variablesToInclude.length > 2 || (variablesToInclude.length > 1 && !hasIntercept) ){
765                throw new ModelSpecificationException(
766                        LocalizedFormats.ARRAY_SIZE_EXCEEDS_MAX_VARIABLES,
767                        (variablesToInclude.length > 1 && !hasIntercept) ? 1 : 2);
768            }
769    
770            if( hasIntercept ){
771                if( variablesToInclude.length == 2 ){
772                    if( variablesToInclude[0] == 1 ){
773                        throw new ModelSpecificationException(LocalizedFormats.NOT_INCREASING_SEQUENCE);
774                    }else if( variablesToInclude[0] != 0 ){
775                        throw new OutOfRangeException( variablesToInclude[0], 0,1 );
776                    }
777                    if( variablesToInclude[1] != 1){
778                         throw new OutOfRangeException( variablesToInclude[0], 0,1 );
779                    }
780                    return regress();
781                }else{
782                    if( variablesToInclude[0] != 1 && variablesToInclude[0] != 0 ){
783                         throw new OutOfRangeException( variablesToInclude[0],0,1 );
784                    }
785                    final double _mean = sumY * sumY / ((double) n);
786                    final double _syy = sumYY + _mean;
787                    if( variablesToInclude[0] == 0 ){
788                        //just the mean
789                        final double[] vcv = new double[]{ sumYY/((double)((n-1)*n)) };
790                        final double[] params = new double[]{ ybar };
791                        return new RegressionResults(
792                          params, new double[][]{vcv}, true, n, 1,
793                          sumY, _syy+_mean, sumYY,true,false);
794    
795                    }else if( variablesToInclude[0] == 1){
796                        //final double _syy = sumYY + sumY * sumY / ((double) n);
797                        final double _sxx = sumXX + sumX * sumX / ((double) n);
798                        final double _sxy = sumXY + sumX * sumY / ((double) n);
799                        final double _sse = FastMath.max(0d, _syy - _sxy * _sxy / _sxx);
800                        final double _mse = _sse/((double)(n-1));
801                        if( !Double.isNaN(_sxx) ){
802                            final double[] vcv = new double[]{ _mse / _sxx };
803                            final double[] params = new double[]{ _sxy/_sxx };
804                            return new RegressionResults(
805                                        params, new double[][]{vcv}, true, n, 1,
806                                        sumY, _syy, _sse,false,false);
807                        }else{
808                            final double[] vcv = new double[]{Double.NaN };
809                            final double[] params = new double[]{ Double.NaN };
810                            return new RegressionResults(
811                                        params, new double[][]{vcv}, true, n, 1,
812                                        Double.NaN, Double.NaN, Double.NaN,false,false);
813                        }
814                    }
815                }
816            }else{
817                if( variablesToInclude[0] != 0 ){
818                    throw new OutOfRangeException(variablesToInclude[0],0,0);
819                }
820                return regress();
821            }
822    
823            return null;
824        }
825    }