Algorithm | Accuracy |
---|---|
PA2 | 0.8204357625845229 |
SCW1 | 0.8314550463310794 |
AROW | 0.8474830954169797 |
SCW2 | 0.8482344102178813 |
CW | 0.850488354620586 |
delete jar /home/myui/tmp/hivemall.jar; add jar /home/myui/tmp/hivemall.jar; source /home/myui/tmp/define-all.hive;
#[CW]
drop table news20mc_cw_model1; create table news20mc_cw_model1 as select label, cast(feature as int) as feature, -- voted_avg(weight) as weight -- [hivemall v0.1] argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later] from (select -- train_multiclass_cw(add_bias(features),label) as (label,feature,weight) -- [hivemall v0.1] train_multiclass_cw(add_bias(features),label) as (label,feature,weight,covar) -- [hivemall v0.2 or later] from news20mc_train_x3 ) t group by label, feature;
create or replace view news20mc_cw_predict1 as select rowid, m.col0 as score, m.col1 as label from ( select rowid, maxrow(score, label) as m from ( select t.rowid, m.label, sum(m.weight * t.value) as score from news20mc_test_exploded t LEFT OUTER JOIN news20mc_cw_model1 m ON (t.feature = m.feature) group by t.rowid, m.label ) t1 group by rowid ) t2;
create or replace view news20mc_cw_submit1 as select t.label as actual, pd.label as predicted from news20mc_test t JOIN news20mc_cw_predict1 pd on (t.rowid = pd.rowid);
select count(1)/3993 from news20mc_cw_submit1 where actual == predicted;
0.850488354620586
drop table news20mc_cw_model1; drop table news20mc_cw_predict1; drop view news20mc_cw_submit1;
#[AROW]
drop table news20mc_arow_model1; create table news20mc_arow_model1 as select label, cast(feature as int) as feature, -- voted_avg(weight) as weight -- [hivemall v0.1] argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later] from (select -- train_multiclass_arow(add_bias(features),label) as (label,feature,weight) -- [hivemall v0.1] train_multiclass_arow(add_bias(features),label) as (label,feature,weight,covar) -- [hivemall v0.2 or later] from news20mc_train_x3 ) t group by label, feature;
create or replace view news20mc_arow_predict1 as select rowid, m.col0 as score, m.col1 as label from ( select rowid, maxrow(score, label) as m from ( select t.rowid, m.label, sum(m.weight * t.value) as score from news20mc_test_exploded t LEFT OUTER JOIN news20mc_arow_model1 m ON (t.feature = m.feature) group by t.rowid, m.label ) t1 group by rowid ) t2;
create or replace view news20mc_arow_submit1 as select t.label as actual, pd.label as predicted from news20mc_test t JOIN news20mc_arow_predict1 pd on (t.rowid = pd.rowid);
select count(1)/3993 from news20mc_arow_submit1 where actual == predicted;
0.8474830954169797
drop table news20mc_arow_model1; drop table news20mc_arow_predict1; drop view news20mc_arow_submit1;
#[SCW1]
drop table news20mc_scw_model1; create table news20mc_scw_model1 as select label, cast(feature as int) as feature, -- voted_avg(weight) as weight -- [hivemall v0.1] argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later] from (select -- train_multiclass_scw(add_bias(features),label) as (label,feature,weight) -- [hivemall v0.1] train_multiclass_scw(add_bias(features),label) as (label,feature,weight,covar) -- [hivemall v0.2 or later] from news20mc_train_x3 ) t group by label, feature;
create or replace view news20mc_scw_predict1 as select rowid, m.col0 as score, m.col1 as label from ( select rowid, maxrow(score, label) as m from ( select t.rowid, m.label, sum(m.weight * t.value) as score from news20mc_test_exploded t LEFT OUTER JOIN news20mc_scw_model1 m ON (t.feature = m.feature) group by t.rowid, m.label ) t1 group by rowid ) t2;
create or replace view news20mc_scw_submit1 as select t.label as actual, pd.label as predicted from news20mc_test t JOIN news20mc_scw_predict1 pd on (t.rowid = pd.rowid);
select count(1)/3993 from news20mc_scw_submit1 where actual == predicted;
0.8314550463310794
drop table news20mc_scw_model1; drop table news20mc_scw_predict1; drop view news20mc_scw_submit1;
#[SCW2]
drop table news20mc_scw2_model1; create table news20mc_scw2_model1 as select label, cast(feature as int) as feature, -- voted_avg(weight) as weight -- [hivemall v0.1] argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later] from (select -- train_multiclass_scw2(add_bias(features),label) as (label,feature,weight) -- [hivemall v0.1] train_multiclass_scw2(add_bias(features),label) as (label,feature,weight,covar) -- [hivemall v0.2 or later] from news20mc_train_x3 ) t group by label, feature;
create or replace view news20mc_scw2_predict1 as select rowid, m.col0 as score, m.col1 as label from ( select rowid, maxrow(score, label) as m from ( select t.rowid, m.label, sum(m.weight * t.value) as score from news20mc_test_exploded t LEFT OUTER JOIN news20mc_scw2_model1 m ON (t.feature = m.feature) group by t.rowid, m.label ) t1 group by rowid ) t2;
create or replace view news20mc_scw2_submit1 as select t.label as actual, pd.label as predicted from news20mc_test t JOIN news20mc_scw2_predict1 pd on (t.rowid = pd.rowid);
select count(1)/3993 from news20mc_scw2_submit1 where actual == predicted;
0.8482344102178813
drop table news20mc_scw2_model1; drop table news20mc_scw2_predict1; drop view news20mc_scw2_submit1;