| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # COMMON FUNCTIONS TO SOLVE BINOMIAL DISTRIBUTION PROBLEMS |
| # WORK OVER VECTORS (IN PARALLEL) TO SAVE COMPUTATION TIME |
| |
| # Computes binomial parameter p (the biased-coin probability) |
| # such that Prob [Binom(n, p) <= m] = alpha |
| # Use it for "exact" confidence intervals over p given m, n: |
| # For example, for 95%-confidence intervals, use [p1, p2] |
| # such that Prob [Binom(n, p1) <= m-1] = 0.975 |
| # and Prob [Binom(n, p2) <= m ] = 0.025 |
| binomQuantile = |
| function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] alpha_vector) |
| return (Matrix[double] p_vector) |
| { |
| num_rows = nrow (n_vector); |
| p_min = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0); |
| alpha_p_min = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0); |
| p_max = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0); |
| alpha_p_max = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0); |
| |
| for (i in 1:27) { # Uses "division by half" method to solve equations |
| p_new = (p_min + p_max) / 2.0; |
| [alpha_p_new] = binomProb (n_vector, m_vector, p_new); |
| move_new_to_max = (alpha_p_new < alpha_vector); |
| p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new; |
| p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min; |
| alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new; |
| alpha_p_min = (1 - move_new_to_max) * alpha_p_new + move_new_to_max * alpha_p_min; |
| } |
| p_vector = (p_min + p_max) / 2.0; |
| } |
| |
| |
| # Computes the cumulative distribution fuction of the binomial distribution, |
| # that is, Prob [Binom(n, p) <= m], using the incomplete Beta function |
| # approximated via a continued fraction, see "Handbook of Mathematical Functions" |
| # edited by M. Abramowitz and I.A. Stegun, U.S. Nat-l Bureau of Standards, |
| # 10th print (Dec 1972), Sec. 26.5.8-26.5.9, p. 944 |
| binomProb = |
| function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] p_vector) |
| return (Matrix[double] result) |
| { |
| num_rows = nrow (n_vector); |
| num_iterations = 100; |
| |
| mean_vector = p_vector * n_vector; |
| is_opposite = (mean_vector < m_vector); |
| l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector; |
| q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector; |
| n_minus_l_vector = n_vector - l_vector; |
| |
| is_result_zero1 = (l_vector < - 0.0000000001); |
| is_result_one1 = (n_minus_l_vector < 0.0000000001); |
| is_result_zero2 = (q_vector > 0.9999999999); |
| is_result_one2 = (q_vector < 0.0000000001); |
| |
| is_result_zero = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1); |
| is_result_one = (is_result_one1 + (1 - is_result_one1) * is_result_one2) * (1 - is_result_zero); |
| |
| result = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0); |
| result = result + is_result_one; |
| is_already_done = is_result_zero + is_result_one; |
| still_iterating = 1 - is_already_done; |
| |
| n_vector = (1 - is_already_done) * n_vector + is_already_done * 2; |
| l_vector = (1 - is_already_done) * l_vector + is_already_done; |
| n_minus_l_vector = (1 - is_already_done) * n_minus_l_vector + is_already_done; |
| q_vector = (1 - is_already_done) * q_vector + is_already_done * 0.8; |
| |
| numer_old = q_vector; |
| denom_old = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0); |
| numer = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0); |
| denom = 1.0 - q_vector; |
| |
| is_i_even = 1; |
| |
| for (i in 1:num_iterations) # The continued fraction iterations |
| { |
| is_i_even = 1 - is_i_even; |
| e_term = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0); |
| if (i > 1) { |
| if (is_i_even == 1) { |
| e_term = - (2 * n_minus_l_vector + (i - 2)) * (2 * l_vector - (i - 2)); |
| } |
| if (is_i_even == 0) { |
| e_term = (i - 1) * (2 * n_vector + (i - 1)); |
| } |
| e_term = e_term / (n_minus_l_vector + (i - 2)) / (n_minus_l_vector + (i - 1)); |
| e_term = e_term * 0.25; |
| } |
| numer_new = still_iterating * (q_vector * numer + (1.0 - q_vector) * e_term * numer_old) + (1.0 - still_iterating); |
| denom_new = still_iterating * (q_vector * denom + (1.0 - q_vector) * e_term * denom_old) + (1.0 - still_iterating); |
| numer_old = still_iterating * (q_vector * numer) + (1.0 - still_iterating); |
| denom_old = still_iterating * (q_vector * denom) + (1.0 - still_iterating); |
| numer = numer_new; |
| denom = denom_new; |
| |
| abs_denom = abs (denom); |
| denom_too_big = (abs_denom > 10000000000.0); |
| denom_too_small = (abs_denom < 0.0000000001); |
| denom_normal = 1.0 - denom_too_big - denom_too_small; |
| rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal; |
| numer_old = numer_old * rescale_vector; |
| denom_old = denom_old * rescale_vector; |
| numer = numer * rescale_vector; |
| denom = denom * rescale_vector; |
| |
| convergence_check_left = abs (numer * denom_old - numer_old * denom); |
| convergence_check_right = abs (numer * denom_old) * 0.000000001; |
| has_converged = (convergence_check_left <= convergence_check_right); |
| has_converged = still_iterating * has_converged; |
| still_iterating = still_iterating - has_converged; |
| result = result + has_converged * numer / denom; |
| } |
| |
| result = result + still_iterating * numer / denom; |
| |
| n_vector_not_already_done = (1 - is_already_done) * n_vector; |
| l_vector_not_already_done = (1 - is_already_done) * l_vector; |
| n_minus_l_vector_not_already_done = (1 - is_already_done) * n_minus_l_vector; |
| q_vector_not_already_done = (1 - is_already_done) * q_vector + is_already_done; |
| one_minus_q_vector_not_already_done = (1 - is_already_done) * (1.0 - q_vector) + is_already_done; |
| |
| [n_logfact] = logFactorial (n_vector_not_already_done); |
| [l_logfact] = logFactorial (l_vector_not_already_done); |
| [n_minus_l_logfact] = logFactorial (n_minus_l_vector_not_already_done); |
| |
| log_update_factor = n_logfact - l_logfact - n_minus_l_logfact + l_vector * log (q_vector_not_already_done) |
| + n_minus_l_vector * log (one_minus_q_vector_not_already_done); |
| updated_result = result * (is_already_done + (1 - is_already_done) * exp (log_update_factor)); |
| result = is_opposite + (1 - 2 * is_opposite) * updated_result; |
| } |
| |
| |
| # Computes the logarithm of the factorial of x >= 0 via the Gamma function |
| # From paper: C. Lanczos "A Precision Approximation of the Gamma Function", |
| # Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, pp. 86-96 |
| logFactorial = function (Matrix[double] x) return (Matrix[double] logfact) |
| { |
| y = 1.000000000178; |
| y = y + 76.180091729406 / (x + 1); |
| y = y - 86.505320327112 / (x + 2); |
| y = y + 24.014098222230 / (x + 3); |
| y = y - 1.231739516140 / (x + 4); |
| y = y + 0.001208580030 / (x + 5); |
| y = y - 0.000005363820 / (x + 6); |
| logfact = log(y) + (x + 0.5) * log(x + 5.5) - (x + 5.5) + 0.91893853320467; # log(sqrt(2 * pi)); |
| } |
| |
| |
| |