Note
This feature is supported since Hivemall
v0.3-beta2
or later.
add jar ./tmp/hivemall-with-dependencies.jar; source ./tmp/define-all.hive; use news20;
#[AdaGradRDA]
Note
The current AdaGradRDA implmenetation can only be applied to classification, not to regression, because it uses hinge loss for the loss function.
drop table news20b_adagrad_rda_model1; create table news20b_adagrad_rda_model1 as select feature, voted_avg(weight) as weight from (select train_adagrad_rda(addBias(features),label) as (feature,weight) from news20b_train_x3 ) t group by feature;
create or replace view news20b_adagrad_rda_predict1 as select t.rowid, sum(m.weight * t.value) as total_weight, case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_adagrad_rda_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_adagrad_rda_submit1 as select t.label as actual, pd.label as predicted from news20b_test t JOIN news20b_adagrad_rda_predict1 pd on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_adagrad_rda_submit1 where actual == predicted;
SCW1 0.9661729383506805
ADAGRAD+RDA 0.9677742193755005
#[AdaGrad]
Note that AdaGrad is better suited for a regression problem because the current implementation only support logistic loss.
drop table news20b_adagrad_model1; create table news20b_adagrad_model1 as select feature, voted_avg(weight) as weight from (select adagrad(addBias(features),convert_label(label)) as (feature,weight) from news20b_train_x3 ) t group by feature;
Caution
adagrad
takes 0/1 for a label value andconvert_label(label)
converts a label value from -1/+1 to 0/1.
create or replace view news20b_adagrad_predict1 as select t.rowid, case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_adagrad_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_adagrad_submit1 as select t.label as actual, p.label as predicted from news20b_test t JOIN news20b_adagrad_predict1 p on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adagrad_submit1 where actual == predicted;
0.9549639711769415 (adagrad)
#[AdaDelta]
Caution
AdaDelta can only be applied for regression problem because the current implementation only support logistic loss.
drop table news20b_adadelta_model1; create table news20b_adadelta_model1 as select feature, voted_avg(weight) as weight from (select adadelta(addBias(features),convert_label(label)) as (feature,weight) from news20b_train_x3 ) t group by feature;
create or replace view news20b_adadelta_predict1 as select t.rowid, case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_adadelta_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_adadelta_submit1 as select t.label as actual, p.label as predicted from news20b_test t JOIN news20b_adadelta_predict1 p on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adadelta_submit1 where actual == predicted;
AdaDelta often performs better than AdaGrad.
0.9549639711769415 (adagrad)
0.9545636509207366 (adadelta)