| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file wilcoxon_signed_rank_test.cpp |
| * |
| * @brief Wilcoxon-Signed-Rank-test functions |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| #include <dbconnector/dbconnector.hpp> |
| #include <modules/prob/boost.hpp> |
| #include <modules/shared/HandleTraits.hpp> |
| #include <utils/Math.hpp> |
| |
| #include "wilcoxon_signed_rank_test.hpp" |
| |
| namespace madlib { |
| |
| namespace modules { |
| |
| namespace stats { |
| |
| /** |
| * @brief Transition state for Wilcoxon signed-rank functions |
| * |
| * Note: We assume that the DOUBLE PRECISION array is initialized by the |
| * database with length 9, and all elemenets are 0. |
| */ |
| template <class Handle> |
| class WSRTestTransitionState { |
| public: |
| WSRTestTransitionState(const AnyType &inArray) |
| : mStorage(inArray.getAs<Handle>()), |
| num(&mStorage[0], 2), |
| numTies(&mStorage[2], 2), |
| rankSum(&mStorage[4], 2), |
| lastAbs(&mStorage[6]), |
| lastAbsUpperBound(&mStorage[7]), |
| reduceVariance(&mStorage[8]) { } |
| |
| inline operator AnyType() const { |
| return mStorage; |
| } |
| |
| private: |
| Handle mStorage; |
| |
| public: |
| typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap num; |
| typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap numTies; |
| typename HandleTraits<Handle>::ColumnVectorTransparentHandleMap rankSum; |
| typename HandleTraits<Handle>::ReferenceToDouble lastAbs; |
| typename HandleTraits<Handle>::ReferenceToDouble lastAbsUpperBound; |
| typename HandleTraits<Handle>::ReferenceToDouble reduceVariance; |
| }; |
| |
| |
| /** |
| * @brief Perform the Wilcoxon-Signed-Rank-test transition step |
| * |
| * Index 0 always refers to the positive values and index 1 refers to |
| * the negative values. |
| */ |
| AnyType |
| wsr_test_transition::run(AnyType &args) { |
| WSRTestTransitionState<MutableArrayHandle<double> > state = args[0]; |
| double value = args[1].getAs<double>(); |
| double precision = args.numFields() >= 3 |
| ? args[2].getAs<double>() |
| : -1; |
| |
| if (!std::isfinite(precision)) |
| throw std::invalid_argument((boost::format( |
| "Precision must be finite, but got %1%.") % precision).str()); |
| else if (precision < 0) |
| precision = value * std::numeric_limits<double>::epsilon(); |
| |
| // Ignore values of zero. |
| if (value == 0) |
| return state; |
| |
| double absValue = std::fabs(value); |
| int sample = value > 0 ? 0 : 1; |
| |
| if (state.num.sum() > 0) { |
| if (absValue < state.lastAbs) |
| throw std::invalid_argument("Must be used as an ordered aggregate, " |
| "in ascending order of the absolute value of the first " |
| "argument."); |
| else if (absValue - precision <= state.lastAbsUpperBound) { |
| for (int i = 0; i <= 1; i++) |
| state.rankSum(i) += state.numTies(i) * 0.5; |
| |
| // Hollander, Wolfe ("Nonparametric statistical methods", 2nd ed., |
| // p. 38, 1999) suggest the following: |
| // |
| // For each *group*, add (t^3 - t)/48 to state.reduceVariance where |
| // t denotes the number of ties in that group. |
| // Note that t^3 - t == 0 if t = 0 or t = 1. So it is sufficient |
| // to modify state.reduceVariance only in the current case of the |
| // if-else block. |
| // |
| // Instead of modifying state.reduceVariance only once per group, |
| // we modify it for each repeated occurrence. In detail, we add |
| // [ ((t+1)^3 - (t+1)) - (t^3 - t) ]/48 to state.reduceVariance. |
| // This is equal to t * (t + 1) / 16. |
| double t = state.numTies.sum(); |
| state.reduceVariance += 1./16. * t * (t + 1); |
| } else { |
| // We have here: |
| // state.lastAbs <= state.lastAbsUpperBound < absValue - precision |
| // <= absValue |
| state.numTies.setZero(); |
| } |
| } |
| |
| state.num(sample)++; |
| state.rankSum(sample) += (2. * state.num.sum() - state.numTies.sum()) / 2.; |
| state.numTies(sample)++; |
| state.lastAbs = absValue; |
| if (absValue + precision > state.lastAbsUpperBound) |
| state.lastAbsUpperBound = absValue + precision; |
| |
| return state; |
| } |
| |
| AnyType |
| wsr_test_final::run(AnyType &args) { |
| using boost::math::complement; |
| |
| WSRTestTransitionState<ArrayHandle<double> > state = args[0]; |
| |
| double n_n1 = state.num.sum() * (state.num.sum() + 1); |
| double statistic = state.rankSum.minCoeff(); |
| double z_statistic = (state.rankSum(0) - n_n1 / 4.) |
| / std::sqrt(n_n1 * (2 * state.num.sum() + 1.) / 24. |
| - state.reduceVariance); |
| |
| AnyType tuple; |
| tuple |
| << statistic |
| << static_cast<double>(state.rankSum(0)) |
| << static_cast<double>(state.rankSum(1)) |
| << static_cast<int64_t>(state.num.sum()) |
| << z_statistic |
| << prob::cdf(complement(prob::normal(), z_statistic)) |
| << 2. * prob::cdf(complement(prob::normal(), std::fabs(z_statistic))); |
| return tuple; |
| } |
| |
| } // namespace stats |
| |
| } // namespace modules |
| |
| } // namespace madlib |