blob: ff36a44efc72c8dfc89e541a54baaa981c5a56d1 [file] [log] [blame]
-----------------------------------------------------------------------------
-- Hivemall: Hive scalable Machine Learning Library
-----------------------------------------------------------------------------
-- macros available from hive 0.12.0
-- see https://issues.apache.org/jira/browse/HIVE-2655
--------------------
-- General Macros --
--------------------
create temporary macro java_min(x DOUBLE, y DOUBLE)
reflect("java.lang.Math", "min", x, y);
create temporary macro max2(x DOUBLE, y DOUBLE)
if(x>y,x,y);
create temporary macro min2(x DOUBLE, y DOUBLE)
if(x<y,x,y);
--------------------------
-- Statistics functions --
--------------------------
create temporary macro idf(df_t DOUBLE, n_docs DOUBLE)
log(10, n_docs / max2(1,df_t)) + 1.0;
create temporary macro tfidf(tf FLOAT, df_t DOUBLE, n_docs DOUBLE)
tf * (log(10, n_docs / max2(1,df_t)) + 1.0);
--------------------------
-- Evaluation functions --
--------------------------
-- CAUTION: UDAF in macro is not yet support supported in Hive
-- Root Mean Squared Error
-- create temporary macro RMSE(predicted FLOAT, actual FLOAT)
-- sqrt(sum(pow(predicted - actual,2.0))/count(1));
-- Mean Squared Error
-- create temporary macro MSE(predicted FLOAT, actual FLOAT)
-- sum(pow(predicted - actual,2.0))/count(1);
-- Mean Absolute Error
-- create temporary macro MAE(predicted FLOAT, actual FLOAT)
-- sum(abs(predicted - actual))/count(1);
-- sum of squared errors
-- create temporary macro SSE(predicted FLOAT, actual FLOAT)
-- sum(pow(actual - predicted,2.0));
-- sum of squared total
-- create temporary macro SST(actual FLOAT, mean_actual FLOAT)
-- sum(pow(actual-mean_actual,2.0));
-- coefficient of determination (R^2)
-- create temporary macro R2(predicted FLOAT, actual FLOAT, mean_actual FLOAT)
-- 1 - (SSE(predicted, actual) / SST(actual, mean_actual));