blob: f7addc8f60f4ff0fe9f4214dd5008caf1d77c712 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*//* ----------------------------------------------------------------------- */
---------------------------------------------------------------------------
-- Build training dataset:
---------------------------------------------------------------------------
DROP TABLE IF EXISTS abalone;
CREATE TABLE abalone (
id serial,
sex character varying,
length double precision,
diameter double precision,
height double precision,
"Class" integer
);
COPY abalone (sex, length, diameter, height, "Class") FROM stdin WITH DELIMITER '|' NULL as '@';
M|0.455|0.365|0.095|0
F|0.53|0.42|0.135|0
M|0.35|0.265|0.09|0
F|0.53|0.415|0.15|0
M|0.44|0.365|0.125|0
F|0.545|0.425|0.125|0
I|0.33|0.255|0.08|0
F|0.55|0.44|0.15|0
I|0.425|0.30|0.095|0
F|0.525|0.38|0.140|0
M|0.475|0.37|0.125|0
F|0.535|0.405|0.145|0
M|0.43|0.358|0.11|1
F|0.47|0.355|0.100|1
M|0.49|0.38|0.135|1
F|0.44|0.340|0.100|1
M|0.5|0.400|0.13|1
F|0.565|0.44|0.155|2
I|0.355|0.280|0.085|2
F|0.550|0.415|0.135|2
@|0.475|0.37|0.125|2
\.
SELECT * FROM abalone;
-- default test
select encode_categorical_variables('abalone', 'abalone_out1', 'sex');
select * from abalone_out1;
-- ignoring numeric columns
select encode_categorical_variables('abalone', 'abalone_out2', 'sex, length');
select * from abalone_out2;
-- row_id showing multiple columns,
-- top and value_to_drop able to work together with unquoted column names
select encode_categorical_variables('abalone', 'abalone_out3',
'sex, "Class"', 'class',
'id, sex, "Class"', '2', 'sex=M, Class=1',
true, 'column', false
);
select * from abalone_out3;
-- * working, exclude working, global value_to_drop working
select encode_categorical_variables('abalone', 'abalone_out4',
'*', '"Class"',
'id', '2', 'M',
true, 'column', false
);
select * from abalone_out4;
-- array output working with dictionary output,
-- top with percent input, global value_to_drop
select encode_categorical_variables('abalone', 'abalone_out5',
'sex, "Class" > 1', '',
'id', '0.5', 'M',
true, 'array', false
);
select * from abalone_out5;
select * from abalone_out5_dictionary order by index;
-- dictionary working, top with more than possible values working
select encode_categorical_variables('abalone', 'abalone_out6',
'sex, "Class"', '',
'id', '3', 'class=1',
true, 'svec', true
);
select * from abalone_out6;
select * from abalone_out6_dictionary order by variable, index;
-- Test special charaters and unicode
DROP TABLE IF EXISTS abalone_special_char;
CREATE TABLE abalone_special_char (
id serial,
"se$$''x" character varying,
"len$$'%*()gth" double precision,
diameter double precision,
height double precision,
"ClaЖss" bigint
);
COPY abalone_special_char ("se$$''x", "len$$'%*()gth", diameter, height, "ClaЖss") FROM stdin WITH DELIMITER '|' NULL as '@';
F"F|0.475|0.37|0.125|2
F'F|0.55|0.44|0.15|0
F$$,'}][{F|0.565|0.44|0.155|2
MЖM|0.44|0.365|0.125|0
M@[}(:*;M|0.475|0.37|0.125|2
M,M|0.47|0.355|0.100|1
'F'F'|0.55|0.44|0.15|0
\.
select encode_categorical_variables('abalone_special_char', 'abalone_special_char_out0', '*');
select * from abalone_special_char_out0;
select encode_categorical_variables('abalone_special_char', 'abalone_special_char_out1', '"se$$''''x", "len$$''%*()gth"');
select * from abalone_special_char_out1;
select encode_categorical_variables('abalone_special_char',
'abalone_special_char_out2',
'"se$$''''x", "ClaЖss"', '',
'id', '3', 'claЖss=1',
true, 'svec', true);
select * from abalone_special_char_out2;
select * from abalone_special_char_out2_dictionary order by variable, index;