This tutorial shows how to apply General Regressor for a regression problem of e2006 dataset.
set mapred.reduce.tasks=32; drop table e2006tfidf_generic_model; create table e2006tfidf_generic_model as select feature, avg(weight) as weight from (select train_regressor( add_bias(features), target, '-loss squaredloss -opt AdamHD -reg No -iters 20' ) as (feature, weight) from e2006tfidf_train_x3 ) t group by feature; -- reset to the default setting set mapred.reduce.tasks=-1;
Caution
Regularization could not work well for regression problem. Then, try providing
-reg Nooption as seen in the above query. Also, do not usevoted_avg()for regression.voted_avg()is for classification.
create or replace view e2006tfidf_generic_predict as select t.rowid, sum(m.weight * t.value) as predicted from e2006tfidf_test_exploded t LEFT OUTER JOIN e2006tfidf_generic_model m ON (t.feature = m.feature) group by t.rowid;
WITH submit as ( select t.target as actual, p.predicted as predicted from e2006tfidf_test t JOIN e2006tfidf_generic_predict p on (t.rowid = p.rowid) ) select rmse(predicted, actual) as RMSE, mse(predicted, actual) as MSE, mae(predicted, actual) as MAE, r2(predicted, actual) as R2 from submit;
| rmse | mse | mae | r2 |
|---|---|---|---|
| 0.37125069279938866 | 0.13782707690402607 | 0.2270351090214029 | 0.5232372408076887 |