Note

This feature is supported since Hivemall v0.3-beta2 or later.

UDF preparation

add jar ./tmp/hivemall-with-dependencies.jar;
source ./tmp/define-all.hive;

use news20;

#[AdaGradRDA]

Note

The current AdaGradRDA implmenetation can only be applied to classification, not to regression, because it uses hinge loss for the loss function.

model building

drop table news20b_adagrad_rda_model1;
create table news20b_adagrad_rda_model1 as
select 
 feature,
 voted_avg(weight) as weight
from 
 (select 
     train_adagrad_rda(addBias(features),label) as (feature,weight)
  from 
     news20b_train_x3
 ) t 
group by feature;

prediction

create or replace view news20b_adagrad_rda_predict1 
as
select
  t.rowid, 
  sum(m.weight * t.value) as total_weight,
  case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_adagrad_rda_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

create or replace view news20b_adagrad_rda_submit1 as
select 
  t.label as actual, 
  pd.label as predicted
from 
  news20b_test t JOIN news20b_adagrad_rda_predict1 pd 
    on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_adagrad_rda_submit1 
where actual == predicted;

SCW1 0.9661729383506805

ADAGRAD+RDA 0.9677742193755005

#[AdaGrad]

Note that AdaGrad is better suited for a regression problem because the current implementation only support logistic loss.

model building

drop table news20b_adagrad_model1;
create table news20b_adagrad_model1 as
select 
 feature,
 voted_avg(weight) as weight
from 
 (select 
     adagrad(addBias(features),convert_label(label)) as (feature,weight)
  from 
     news20b_train_x3
 ) t 
group by feature;

Caution

adagrad takes 0/1 for a label value and convert_label(label) converts a label value from -1/+1 to 0/1.

prediction

create or replace view news20b_adagrad_predict1 
as
select
  t.rowid, 
  case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_adagrad_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

create or replace view news20b_adagrad_submit1 as
select 
  t.label as actual, 
  p.label as predicted
from 
  news20b_test t JOIN news20b_adagrad_predict1 p
    on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adagrad_submit1 
where actual == predicted;

0.9549639711769415 (adagrad)

#[AdaDelta]

Caution

AdaDelta can only be applied for regression problem because the current implementation only support logistic loss.

model building

drop table news20b_adadelta_model1;
create table news20b_adadelta_model1 as
select 
 feature,
 voted_avg(weight) as weight
from 
 (select 
     adadelta(addBias(features),convert_label(label)) as (feature,weight)
  from 
     news20b_train_x3
 ) t 
group by feature;

prediction

create or replace view news20b_adadelta_predict1 
as
select
  t.rowid, 
  case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_adadelta_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

create or replace view news20b_adadelta_submit1 as
select 
  t.label as actual, 
  p.label as predicted
from 
  news20b_test t JOIN news20b_adadelta_predict1 p
    on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adadelta_submit1 
where actual == predicted;

AdaDelta often performs better than AdaGrad.

0.9549639711769415 (adagrad)

0.9545636509207366 (adadelta)