blob: 1946a820f7e04ff6eac499db0b02f4d200c3a8ce [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# COMMON FUNCTIONS TO SOLVE BINOMIAL DISTRIBUTION PROBLEMS
# WORK OVER VECTORS (IN PARALLEL) TO SAVE COMPUTATION TIME
# Computes binomial parameter p (the biased-coin probability)
# such that Prob [Binom(n, p) <= m] = alpha
# Use it for "exact" confidence intervals over p given m, n:
# For example, for 95%-confidence intervals, use [p1, p2]
# such that Prob [Binom(n, p1) <= m-1] = 0.975
# and Prob [Binom(n, p2) <= m ] = 0.025
binomQuantile =
function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] alpha_vector)
return (Matrix[double] p_vector)
{
num_rows = nrow (n_vector);
p_min = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
alpha_p_min = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
p_max = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
alpha_p_max = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
for (i in 1:27) { # Uses "division by half" method to solve equations
p_new = (p_min + p_max) / 2.0;
[alpha_p_new] = binomProb (n_vector, m_vector, p_new);
move_new_to_max = (alpha_p_new < alpha_vector);
p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new;
p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min;
alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new;
alpha_p_min = (1 - move_new_to_max) * alpha_p_new + move_new_to_max * alpha_p_min;
}
p_vector = (p_min + p_max) / 2.0;
}
# Computes the cumulative distribution fuction of the binomial distribution,
# that is, Prob [Binom(n, p) <= m], using the incomplete Beta function
# approximated via a continued fraction, see "Handbook of Mathematical Functions"
# edited by M. Abramowitz and I.A. Stegun, U.S. Nat-l Bureau of Standards,
# 10th print (Dec 1972), Sec. 26.5.8-26.5.9, p. 944
binomProb =
function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] p_vector)
return (Matrix[double] result)
{
num_rows = nrow (n_vector);
num_iterations = 100;
mean_vector = p_vector * n_vector;
is_opposite = (mean_vector < m_vector);
l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector;
q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector;
n_minus_l_vector = n_vector - l_vector;
is_result_zero1 = (l_vector < - 0.0000000001);
is_result_one1 = (n_minus_l_vector < 0.0000000001);
is_result_zero2 = (q_vector > 0.9999999999);
is_result_one2 = (q_vector < 0.0000000001);
is_result_zero = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1);
is_result_one = (is_result_one1 + (1 - is_result_one1) * is_result_one2) * (1 - is_result_zero);
result = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
result = result + is_result_one;
is_already_done = is_result_zero + is_result_one;
still_iterating = 1 - is_already_done;
n_vector = (1 - is_already_done) * n_vector + is_already_done * 2;
l_vector = (1 - is_already_done) * l_vector + is_already_done;
n_minus_l_vector = (1 - is_already_done) * n_minus_l_vector + is_already_done;
q_vector = (1 - is_already_done) * q_vector + is_already_done * 0.8;
numer_old = q_vector;
denom_old = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
numer = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
denom = 1.0 - q_vector;
is_i_even = 1;
for (i in 1:num_iterations) # The continued fraction iterations
{
is_i_even = 1 - is_i_even;
e_term = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
if (i > 1) {
if (is_i_even == 1) {
e_term = - (2 * n_minus_l_vector + (i - 2)) * (2 * l_vector - (i - 2));
}
if (is_i_even == 0) {
e_term = (i - 1) * (2 * n_vector + (i - 1));
}
e_term = e_term / (n_minus_l_vector + (i - 2)) / (n_minus_l_vector + (i - 1));
e_term = e_term * 0.25;
}
numer_new = still_iterating * (q_vector * numer + (1.0 - q_vector) * e_term * numer_old) + (1.0 - still_iterating);
denom_new = still_iterating * (q_vector * denom + (1.0 - q_vector) * e_term * denom_old) + (1.0 - still_iterating);
numer_old = still_iterating * (q_vector * numer) + (1.0 - still_iterating);
denom_old = still_iterating * (q_vector * denom) + (1.0 - still_iterating);
numer = numer_new;
denom = denom_new;
abs_denom = abs (denom);
denom_too_big = (abs_denom > 10000000000.0);
denom_too_small = (abs_denom < 0.0000000001);
denom_normal = 1.0 - denom_too_big - denom_too_small;
rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal;
numer_old = numer_old * rescale_vector;
denom_old = denom_old * rescale_vector;
numer = numer * rescale_vector;
denom = denom * rescale_vector;
convergence_check_left = abs (numer * denom_old - numer_old * denom);
convergence_check_right = abs (numer * denom_old) * 0.000000001;
has_converged = (convergence_check_left <= convergence_check_right);
has_converged = still_iterating * has_converged;
still_iterating = still_iterating - has_converged;
result = result + has_converged * numer / denom;
}
result = result + still_iterating * numer / denom;
n_vector_not_already_done = (1 - is_already_done) * n_vector;
l_vector_not_already_done = (1 - is_already_done) * l_vector;
n_minus_l_vector_not_already_done = (1 - is_already_done) * n_minus_l_vector;
q_vector_not_already_done = (1 - is_already_done) * q_vector + is_already_done;
one_minus_q_vector_not_already_done = (1 - is_already_done) * (1.0 - q_vector) + is_already_done;
[n_logfact] = logFactorial (n_vector_not_already_done);
[l_logfact] = logFactorial (l_vector_not_already_done);
[n_minus_l_logfact] = logFactorial (n_minus_l_vector_not_already_done);
log_update_factor = n_logfact - l_logfact - n_minus_l_logfact + l_vector * log (q_vector_not_already_done)
+ n_minus_l_vector * log (one_minus_q_vector_not_already_done);
updated_result = result * (is_already_done + (1 - is_already_done) * exp (log_update_factor));
result = is_opposite + (1 - 2 * is_opposite) * updated_result;
}
# Computes the logarithm of the factorial of x >= 0 via the Gamma function
# From paper: C. Lanczos "A Precision Approximation of the Gamma Function",
# Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, pp. 86-96
logFactorial = function (Matrix[double] x) return (Matrix[double] logfact)
{
y = 1.000000000178;
y = y + 76.180091729406 / (x + 1);
y = y - 86.505320327112 / (x + 2);
y = y + 24.014098222230 / (x + 3);
y = y - 1.231739516140 / (x + 4);
y = y + 0.001208580030 / (x + 5);
y = y - 0.000005363820 / (x + 6);
logfact = log(y) + (x + 0.5) * log(x + 5.5) - (x + 5.5) + 0.91893853320467; # log(sqrt(2 * pi));
}