| ----------------------------------------------------------------------------- |
| -- Hivemall: Hive scalable Machine Learning Library |
| ----------------------------------------------------------------------------- |
| |
| -- macros available from hive 0.12.0 |
| -- see https://issues.apache.org/jira/browse/HIVE-2655 |
| |
| -------------------- |
| -- General Macros -- |
| -------------------- |
| |
| create temporary macro java_min(x DOUBLE, y DOUBLE) |
| reflect("java.lang.Math", "min", x, y); |
| |
| create temporary macro max2(x DOUBLE, y DOUBLE) |
| if(x>y,x,y); |
| |
| create temporary macro min2(x DOUBLE, y DOUBLE) |
| if(x<y,x,y); |
| |
| -------------------------- |
| -- Statistics functions -- |
| -------------------------- |
| |
| create temporary macro idf(df_t DOUBLE, n_docs DOUBLE) |
| log(10, n_docs / max2(1,df_t)) + 1.0; |
| |
| create temporary macro tfidf(tf FLOAT, df_t DOUBLE, n_docs DOUBLE) |
| tf * (log(10, n_docs / max2(1,df_t)) + 1.0); |
| |
| -------------------------- |
| -- Evaluation functions -- |
| -------------------------- |
| |
| -- CAUTION: UDAF in macro is not yet support supported in Hive |
| |
| -- Root Mean Squared Error |
| -- create temporary macro RMSE(predicted FLOAT, actual FLOAT) |
| -- sqrt(sum(pow(predicted - actual,2.0))/count(1)); |
| |
| -- Mean Squared Error |
| -- create temporary macro MSE(predicted FLOAT, actual FLOAT) |
| -- sum(pow(predicted - actual,2.0))/count(1); |
| |
| -- Mean Absolute Error |
| -- create temporary macro MAE(predicted FLOAT, actual FLOAT) |
| -- sum(abs(predicted - actual))/count(1); |
| |
| -- sum of squared errors |
| -- create temporary macro SSE(predicted FLOAT, actual FLOAT) |
| -- sum(pow(actual - predicted,2.0)); |
| |
| -- sum of squared total |
| -- create temporary macro SST(actual FLOAT, mean_actual FLOAT) |
| -- sum(pow(actual-mean_actual,2.0)); |
| |
| -- coefficient of determination (R^2) |
| -- create temporary macro R2(predicted FLOAT, actual FLOAT, mean_actual FLOAT) |
| -- 1 - (SSE(predicted, actual) / SST(actual, mean_actual)); |