001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    import org.apache.commons.math.exception.util.LocalizedFormats;
024    import org.apache.commons.math.util.MathUtils;
025    import org.apache.commons.math.util.FastMath;
026    
027    /**
028     * The default implementation of {@link HypergeometricDistribution}.
029     *
030     * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
031     */
032    public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
033            implements HypergeometricDistribution, Serializable {
034    
035        /** Serializable version identifier */
036        private static final long serialVersionUID = -436928820673516179L;
037    
038        /** The number of successes in the population. */
039        private int numberOfSuccesses;
040    
041        /** The population size. */
042        private int populationSize;
043    
044        /** The sample size. */
045        private int sampleSize;
046    
047        /**
048         * Construct a new hypergeometric distribution with the given the population
049         * size, the number of successes in the population, and the sample size.
050         *
051         * @param populationSize the population size.
052         * @param numberOfSuccesses number of successes in the population.
053         * @param sampleSize the sample size.
054         */
055        public HypergeometricDistributionImpl(int populationSize,
056                int numberOfSuccesses, int sampleSize) {
057            super();
058            if (numberOfSuccesses > populationSize) {
059                throw MathRuntimeException
060                        .createIllegalArgumentException(
061                                LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
062                                numberOfSuccesses, populationSize);
063            }
064            if (sampleSize > populationSize) {
065                throw MathRuntimeException
066                        .createIllegalArgumentException(
067                                LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
068                                sampleSize, populationSize);
069            }
070    
071            setPopulationSizeInternal(populationSize);
072            setSampleSizeInternal(sampleSize);
073            setNumberOfSuccessesInternal(numberOfSuccesses);
074        }
075    
076        /**
077         * For this distribution, X, this method returns P(X ≤ x).
078         *
079         * @param x the value at which the PDF is evaluated.
080         * @return PDF for this distribution.
081         */
082        @Override
083        public double cumulativeProbability(int x) {
084            double ret;
085    
086            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
087            if (x < domain[0]) {
088                ret = 0.0;
089            } else if (x >= domain[1]) {
090                ret = 1.0;
091            } else {
092                ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
093                                                 numberOfSuccesses, sampleSize);
094            }
095    
096            return ret;
097        }
098    
099        /**
100         * Return the domain for the given hypergeometric distribution parameters.
101         *
102         * @param n the population size.
103         * @param m number of successes in the population.
104         * @param k the sample size.
105         * @return a two element array containing the lower and upper bounds of the
106         *         hypergeometric distribution.
107         */
108        private int[] getDomain(int n, int m, int k) {
109            return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
110        }
111    
112        /**
113         * Access the domain value lower bound, based on <code>p</code>, used to
114         * bracket a PDF root.
115         *
116         * @param p the desired probability for the critical value
117         * @return domain value lower bound, i.e. P(X &lt; <i>lower bound</i>) &lt;
118         *         <code>p</code>
119         */
120        @Override
121        protected int getDomainLowerBound(double p) {
122            return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
123        }
124    
125        /**
126         * Access the domain value upper bound, based on <code>p</code>, used to
127         * bracket a PDF root.
128         *
129         * @param p the desired probability for the critical value
130         * @return domain value upper bound, i.e. P(X &lt; <i>upper bound</i>) &gt;
131         *         <code>p</code>
132         */
133        @Override
134        protected int getDomainUpperBound(double p) {
135            return getUpperDomain(sampleSize, numberOfSuccesses);
136        }
137    
138        /**
139         * Return the lowest domain value for the given hypergeometric distribution
140         * parameters.
141         *
142         * @param n the population size.
143         * @param m number of successes in the population.
144         * @param k the sample size.
145         * @return the lowest domain value of the hypergeometric distribution.
146         */
147        private int getLowerDomain(int n, int m, int k) {
148            return FastMath.max(0, m - (n - k));
149        }
150    
151        /**
152         * Access the number of successes.
153         *
154         * @return the number of successes.
155         */
156        public int getNumberOfSuccesses() {
157            return numberOfSuccesses;
158        }
159    
160        /**
161         * Access the population size.
162         *
163         * @return the population size.
164         */
165        public int getPopulationSize() {
166            return populationSize;
167        }
168    
169        /**
170         * Access the sample size.
171         *
172         * @return the sample size.
173         */
174        public int getSampleSize() {
175            return sampleSize;
176        }
177    
178        /**
179         * Return the highest domain value for the given hypergeometric distribution
180         * parameters.
181         *
182         * @param m number of successes in the population.
183         * @param k the sample size.
184         * @return the highest domain value of the hypergeometric distribution.
185         */
186        private int getUpperDomain(int m, int k) {
187            return FastMath.min(k, m);
188        }
189    
190        /**
191         * For this distribution, X, this method returns P(X = x).
192         *
193         * @param x the value at which the PMF is evaluated.
194         * @return PMF for this distribution.
195         */
196        public double probability(int x) {
197            double ret;
198    
199            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
200            if (x < domain[0] || x > domain[1]) {
201                ret = 0.0;
202            } else {
203                double p = (double) sampleSize / (double) populationSize;
204                double q = (double) (populationSize - sampleSize) / (double) populationSize;
205                double p1 = SaddlePointExpansion.logBinomialProbability(x,
206                        numberOfSuccesses, p, q);
207                double p2 =
208                    SaddlePointExpansion.logBinomialProbability(sampleSize - x,
209                        populationSize - numberOfSuccesses, p, q);
210                double p3 =
211                    SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
212                ret = FastMath.exp(p1 + p2 - p3);
213            }
214    
215            return ret;
216        }
217    
218        /**
219         * For the distribution, X, defined by the given hypergeometric distribution
220         * parameters, this method returns P(X = x).
221         *
222         * @param n the population size.
223         * @param m number of successes in the population.
224         * @param k the sample size.
225         * @param x the value at which the PMF is evaluated.
226         * @return PMF for the distribution.
227         */
228        private double probability(int n, int m, int k, int x) {
229            return FastMath.exp(MathUtils.binomialCoefficientLog(m, x) +
230                   MathUtils.binomialCoefficientLog(n - m, k - x) -
231                   MathUtils.binomialCoefficientLog(n, k));
232        }
233    
234        /**
235         * Modify the number of successes.
236         *
237         * @param num the new number of successes.
238         * @throws IllegalArgumentException if <code>num</code> is negative.
239         * @deprecated as of 2.1 (class will become immutable in 3.0)
240         */
241        @Deprecated
242        public void setNumberOfSuccesses(int num) {
243            setNumberOfSuccessesInternal(num);
244        }
245    
246        /**
247         * Modify the number of successes.
248         *
249         * @param num the new number of successes.
250         * @throws IllegalArgumentException if <code>num</code> is negative.
251         */
252        private void setNumberOfSuccessesInternal(int num) {
253            if (num < 0) {
254                throw MathRuntimeException.createIllegalArgumentException(
255                        LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, num);
256            }
257            numberOfSuccesses = num;
258        }
259    
260        /**
261         * Modify the population size.
262         *
263         * @param size the new population size.
264         * @throws IllegalArgumentException if <code>size</code> is not positive.
265         * @deprecated as of 2.1 (class will become immutable in 3.0)
266         */
267        @Deprecated
268        public void setPopulationSize(int size) {
269            setPopulationSizeInternal(size);
270        }
271    
272        /**
273         * Modify the population size.
274         *
275         * @param size the new population size.
276         * @throws IllegalArgumentException if <code>size</code> is not positive.
277         */
278        private void setPopulationSizeInternal(int size) {
279            if (size <= 0) {
280                throw MathRuntimeException.createIllegalArgumentException(
281                        LocalizedFormats.NOT_POSITIVE_POPULATION_SIZE, size);
282            }
283            populationSize = size;
284        }
285    
286        /**
287         * Modify the sample size.
288         *
289         * @param size the new sample size.
290         * @throws IllegalArgumentException if <code>size</code> is negative.
291         * @deprecated as of 2.1 (class will become immutable in 3.0)
292         */
293        @Deprecated
294        public void setSampleSize(int size) {
295            setSampleSizeInternal(size);
296        }
297        /**
298         * Modify the sample size.
299         *
300         * @param size the new sample size.
301         * @throws IllegalArgumentException if <code>size</code> is negative.
302         */
303        private void setSampleSizeInternal(int size) {
304            if (size < 0) {
305                throw MathRuntimeException.createIllegalArgumentException(
306                        LocalizedFormats.NOT_POSITIVE_SAMPLE_SIZE, size);
307            }
308            sampleSize = size;
309        }
310    
311        /**
312         * For this distribution, X, this method returns P(X &ge; x).
313         *
314         * @param x the value at which the CDF is evaluated.
315         * @return upper tail CDF for this distribution.
316         * @since 1.1
317         */
318        public double upperCumulativeProbability(int x) {
319            double ret;
320    
321            final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
322            if (x < domain[0]) {
323                ret = 1.0;
324            } else if (x > domain[1]) {
325                ret = 0.0;
326            } else {
327                ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize);
328            }
329    
330            return ret;
331        }
332    
333        /**
334         * For this distribution, X, this method returns P(x0 &le; X &le; x1). This
335         * probability is computed by summing the point probabilities for the values
336         * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
337         *
338         * @param x0 the inclusive, lower bound
339         * @param x1 the inclusive, upper bound
340         * @param dx the direction of summation. 1 indicates summing from x0 to x1.
341         *            0 indicates summing from x1 to x0.
342         * @param n the population size.
343         * @param m number of successes in the population.
344         * @param k the sample size.
345         * @return P(x0 &le; X &le; x1).
346         */
347        private double innerCumulativeProbability(int x0, int x1, int dx, int n,
348                int m, int k) {
349            double ret = probability(n, m, k, x0);
350            while (x0 != x1) {
351                x0 += dx;
352                ret += probability(n, m, k, x0);
353            }
354            return ret;
355        }
356    
357        /**
358         * Returns the lower bound for the support for the distribution.
359         *
360         * For population size <code>N</code>,
361         * number of successes <code>m</code>, and
362         * sample size <code>n</code>,
363         * the lower bound of the support is
364         * <code>max(0, n + m - N)</code>
365         *
366         * @return lower bound of the support
367         * @since 2.2
368         */
369        public int getSupportLowerBound() {
370            return FastMath.max(0,
371                    getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
372        }
373    
374        /**
375         * Returns the upper bound for the support of the distribution.
376         *
377         * For number of successes <code>m</code> and
378         * sample size <code>n</code>,
379         * the upper bound of the support is
380         * <code>min(m, n)</code>
381         *
382         * @return upper bound of the support
383         * @since 2.2
384         */
385        public int getSupportUpperBound() {
386            return FastMath.min(getNumberOfSuccesses(), getSampleSize());
387        }
388    
389        /**
390         * Returns the mean.
391         *
392         * For population size <code>N</code>,
393         * number of successes <code>m</code>, and
394         * sample size <code>n</code>, the mean is
395         * <code>n * m / N</code>
396         *
397         * @return the mean
398         * @since 2.2
399         */
400        protected double getNumericalMean() {
401            return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize();
402        }
403    
404        /**
405         * Returns the variance.
406         *
407         * For population size <code>N</code>,
408         * number of successes <code>m</code>, and
409         * sample size <code>n</code>, the variance is
410         * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code>
411         *
412         * @return the variance
413         * @since 2.2
414         */
415        public double getNumericalVariance() {
416            final double N = getPopulationSize();
417            final double m = getNumberOfSuccesses();
418            final double n = getSampleSize();
419            return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) );
420        }
421    }