delete jar /home/myui/tmp/hivemall.jar; add jar /home/myui/tmp/hivemall.jar; source /home/myui/tmp/define-all.hive;
#[Perceptron]
drop table news20b_perceptron_model1; create table news20b_perceptron_model1 as select feature, voted_avg(weight) as weight from (select perceptron(add_bias(features),label) as (feature,weight) from news20b_train_x3 ) t group by feature;
create or replace view news20b_perceptron_predict1 as select t.rowid, sum(m.weight * t.value) as total_weight, case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_perceptron_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_perceptron_submit1 as select t.label as actual, pd.label as predicted from news20b_test t JOIN news20b_perceptron_predict1 pd on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_perceptron_submit1 where actual == predicted;
0.9459567654123299
drop table news20b_perceptron_model1; drop view news20b_perceptron_predict1; drop view news20b_perceptron_submit1;
#[Passive Aggressive]
drop table news20b_pa_model1; create table news20b_pa_model1 as select feature, voted_avg(weight) as weight from (select train_pa(add_bias(features),label) as (feature,weight) from news20b_train_x3 ) t group by feature;
create or replace view news20b_pa_predict1 as select t.rowid, sum(m.weight * t.value) as total_weight, case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_pa_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_pa_submit1 as select t.label as actual, pd.label as predicted from news20b_test t JOIN news20b_pa_predict1 pd on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_pa_submit1 where actual == predicted;
0.9603682946357086
drop table news20b_pa_model1; drop view news20b_pa_predict1; drop view news20b_pa_submit1;
#[Passive Aggressive (PA1)]
drop table news20b_pa1_model1; create table news20b_pa1_model1 as select feature, voted_avg(weight) as weight from (select train_pa1(add_bias(features),label) as (feature,weight) from news20b_train_x3 ) t group by feature;
create or replace view news20b_pa1_predict1 as select t.rowid, sum(m.weight * t.value) as total_weight, case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_pa1_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_pa1_submit1 as select t.label as actual, pd.label as predicted from news20b_test t JOIN news20b_pa1_predict1 pd on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_pa1_submit1 where actual == predicted;
0.9601681345076061
drop table news20b_pa1_model1; drop view news20b_pa1_predict1; drop view news20b_pa1_submit1;
#[Passive Aggressive (PA2)]
drop table news20b_pa2_model1; create table news20b_pa2_model1 as select feature, voted_avg(weight) as weight from (select train_pa2(add_bias(features),label) as (feature,weight) from news20b_train_x3 ) t group by feature;
create or replace view news20b_pa2_predict1 as select t.rowid, sum(m.weight * t.value) as total_weight, case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label from news20b_test_exploded t LEFT OUTER JOIN news20b_pa2_model1 m ON (t.feature = m.feature) group by t.rowid;
create or replace view news20b_pa2_submit1 as select t.label as actual, pd.label as predicted from news20b_test t JOIN news20b_pa2_predict1 pd on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_pa2_submit1 where actual == predicted;
0.9597678142514011
drop table news20b_pa2_model1; drop view news20b_pa2_predict1; drop view news20b_pa2_submit1;