001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.correlation;
018    
019    import org.apache.commons.math.MathRuntimeException;
020    import org.apache.commons.math.exception.util.LocalizedFormats;
021    import org.apache.commons.math.linear.RealMatrix;
022    import org.apache.commons.math.linear.BlockRealMatrix;
023    import org.apache.commons.math.stat.descriptive.moment.Mean;
024    import org.apache.commons.math.stat.descriptive.moment.Variance;
025    
026    /**
027     * Computes covariances for pairs of arrays or columns of a matrix.
028     *
029     * <p>The constructors that take <code>RealMatrix</code> or
030     * <code>double[][]</code> arguments generate covariance matrices.  The
031     * columns of the input matrices are assumed to represent variable values.</p>
032     *
033     * <p>The constructor argument <code>biasCorrected</code> determines whether or
034     * not computed covariances are bias-corrected.</p>
035     *
036     * <p>Unbiased covariances are given by the formula</p>
037     * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
038     * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
039     * is the mean of the <code>Y</code> values.
040     *
041     * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
042     *
043     * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 ao??t 2010) $
044     * @since 2.0
045     */
046    public class Covariance {
047    
048        /** covariance matrix */
049        private final RealMatrix covarianceMatrix;
050    
051        /**
052         * Create an empty covariance matrix.
053         */
054        /** Number of observations (length of covariate vectors) */
055        private final int n;
056    
057        /**
058         * Create a Covariance with no data
059         */
060        public Covariance() {
061            super();
062            covarianceMatrix = null;
063            n = 0;
064        }
065    
066        /**
067         * Create a Covariance matrix from a rectangular array
068         * whose columns represent covariates.
069         *
070         * <p>The <code>biasCorrected</code> parameter determines whether or not
071         * covariance estimates are bias-corrected.</p>
072         *
073         * <p>The input array must be rectangular with at least two columns
074         * and two rows.</p>
075         *
076         * @param data rectangular array with columns representing covariates
077         * @param biasCorrected true means covariances are bias-corrected
078         * @throws IllegalArgumentException if the input data array is not
079         * rectangular with at least two rows and two columns.
080         */
081        public Covariance(double[][] data, boolean biasCorrected) {
082            this(new BlockRealMatrix(data), biasCorrected);
083        }
084    
085        /**
086         * Create a Covariance matrix from a rectangular array
087         * whose columns represent covariates.
088         *
089         * <p>The input array must be rectangular with at least two columns
090         * and two rows</p>
091         *
092         * @param data rectangular array with columns representing covariates
093         * @throws IllegalArgumentException if the input data array is not
094         * rectangular with at least two rows and two columns.
095         */
096        public Covariance(double[][] data) {
097            this(data, true);
098        }
099    
100        /**
101         * Create a covariance matrix from a matrix whose columns
102         * represent covariates.
103         *
104         * <p>The <code>biasCorrected</code> parameter determines whether or not
105         * covariance estimates are bias-corrected.</p>
106         *
107         * <p>The matrix must have at least two columns and two rows</p>
108         *
109         * @param matrix matrix with columns representing covariates
110         * @param biasCorrected true means covariances are bias-corrected
111         * @throws IllegalArgumentException if the input matrix does not have
112         * at least two rows and two columns
113         */
114        public Covariance(RealMatrix matrix, boolean biasCorrected) {
115           checkSufficientData(matrix);
116           n = matrix.getRowDimension();
117           covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
118        }
119    
120        /**
121         * Create a covariance matrix from a matrix whose columns
122         * represent covariates.
123         *
124         * <p>The matrix must have at least two columns and two rows</p>
125         *
126         * @param matrix matrix with columns representing covariates
127         * @throws IllegalArgumentException if the input matrix does not have
128         * at least two rows and two columns
129         */
130        public Covariance(RealMatrix matrix) {
131            this(matrix, true);
132        }
133    
134        /**
135         * Returns the covariance matrix
136         *
137         * @return covariance matrix
138         */
139        public RealMatrix getCovarianceMatrix() {
140            return covarianceMatrix;
141        }
142    
143        /**
144         * Returns the number of observations (length of covariate vectors)
145         *
146         * @return number of observations
147         */
148    
149        public int getN() {
150            return n;
151        }
152    
153        /**
154         * Compute a covariance matrix from a matrix whose columns represent
155         * covariates.
156         * @param matrix input matrix (must have at least two columns and two rows)
157         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
158         * @return covariance matrix
159         */
160        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
161            int dimension = matrix.getColumnDimension();
162            Variance variance = new Variance(biasCorrected);
163            RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
164            for (int i = 0; i < dimension; i++) {
165                for (int j = 0; j < i; j++) {
166                  double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
167                  outMatrix.setEntry(i, j, cov);
168                  outMatrix.setEntry(j, i, cov);
169                }
170                outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
171            }
172            return outMatrix;
173        }
174    
175        /**
176         * Create a covariance matrix from a matrix whose columns represent
177         * covariates. Covariances are computed using the bias-corrected formula.
178         * @param matrix input matrix (must have at least two columns and two rows)
179         * @return covariance matrix
180         * @see #Covariance
181         */
182        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
183            return computeCovarianceMatrix(matrix, true);
184        }
185    
186        /**
187         * Compute a covariance matrix from a rectangular array whose columns represent
188         * covariates.
189         * @param data input array (must have at least two columns and two rows)
190         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
191         * @return covariance matrix
192         */
193        protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
194            return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
195        }
196    
197        /**
198         * Create a covariance matrix from a rectangual array whose columns represent
199         * covariates. Covariances are computed using the bias-corrected formula.
200         * @param data input array (must have at least two columns and two rows)
201         * @return covariance matrix
202         * @see #Covariance
203         */
204        protected RealMatrix computeCovarianceMatrix(double[][] data) {
205            return computeCovarianceMatrix(data, true);
206        }
207    
208        /**
209         * Computes the covariance between the two arrays.
210         *
211         * <p>Array lengths must match and the common length must be at least 2.</p>
212         *
213         * @param xArray first data array
214         * @param yArray second data array
215         * @param biasCorrected if true, returned value will be bias-corrected
216         * @return returns the covariance for the two arrays
217         * @throws  IllegalArgumentException if the arrays lengths do not match or
218         * there is insufficient data
219         */
220        public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
221            throws IllegalArgumentException {
222            Mean mean = new Mean();
223            double result = 0d;
224            int length = xArray.length;
225            if (length != yArray.length) {
226                throw MathRuntimeException.createIllegalArgumentException(
227                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
228            } else if (length < 2) {
229                throw MathRuntimeException.createIllegalArgumentException(
230                      LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2);
231            } else {
232                double xMean = mean.evaluate(xArray);
233                double yMean = mean.evaluate(yArray);
234                for (int i = 0; i < length; i++) {
235                    double xDev = xArray[i] - xMean;
236                    double yDev = yArray[i] - yMean;
237                    result += (xDev * yDev - result) / (i + 1);
238                }
239            }
240            return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
241        }
242    
243        /**
244         * Computes the covariance between the two arrays, using the bias-corrected
245         * formula.
246         *
247         * <p>Array lengths must match and the common length must be at least 2.</p>
248         *
249         * @param xArray first data array
250         * @param yArray second data array
251         * @return returns the covariance for the two arrays
252         * @throws  IllegalArgumentException if the arrays lengths do not match or
253         * there is insufficient data
254         */
255        public double covariance(final double[] xArray, final double[] yArray)
256            throws IllegalArgumentException {
257            return covariance(xArray, yArray, true);
258        }
259    
260        /**
261         * Throws IllegalArgumentException of the matrix does not have at least
262         * two columns and two rows
263         * @param matrix matrix to check
264         */
265        private void checkSufficientData(final RealMatrix matrix) {
266            int nRows = matrix.getRowDimension();
267            int nCols = matrix.getColumnDimension();
268            if (nRows < 2 || nCols < 2) {
269                throw MathRuntimeException.createIllegalArgumentException(
270                        LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
271                        nRows, nCols);
272            }
273        }
274    }