| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| --------------------------------------------------------------------------- |
| -- Build training dataset: |
| --------------------------------------------------------------------------- |
| DROP TABLE IF EXISTS abalone; |
| CREATE TABLE abalone ( |
| id serial, |
| sex character varying, |
| length double precision, |
| diameter double precision, |
| height double precision, |
| "Class" integer |
| ); |
| COPY abalone (sex, length, diameter, height, "Class") FROM stdin WITH DELIMITER '|' NULL as '@'; |
| M|0.455|0.365|0.095|0 |
| F|0.53|0.42|0.135|0 |
| M|0.35|0.265|0.09|0 |
| F|0.53|0.415|0.15|0 |
| M|0.44|0.365|0.125|0 |
| F|0.545|0.425|0.125|0 |
| I|0.33|0.255|0.08|0 |
| F|0.55|0.44|0.15|0 |
| I|0.425|0.30|0.095|0 |
| F|0.525|0.38|0.140|0 |
| M|0.475|0.37|0.125|0 |
| F|0.535|0.405|0.145|0 |
| M|0.43|0.358|0.11|1 |
| F|0.47|0.355|0.100|1 |
| M|0.49|0.38|0.135|1 |
| F|0.44|0.340|0.100|1 |
| M|0.5|0.400|0.13|1 |
| F|0.565|0.44|0.155|2 |
| I|0.355|0.280|0.085|2 |
| F|0.550|0.415|0.135|2 |
| @|0.475|0.37|0.125|2 |
| \. |
| |
| SELECT * FROM abalone; |
| |
| -- default test |
| select encode_categorical_variables('abalone', 'abalone_out1', 'sex'); |
| select * from abalone_out1; |
| |
| -- ignoring numeric columns |
| select encode_categorical_variables('abalone', 'abalone_out2', 'sex, length'); |
| select * from abalone_out2; |
| |
| -- row_id showing multiple columns, |
| -- top and value_to_drop able to work together with unquoted column names |
| select encode_categorical_variables('abalone', 'abalone_out3', |
| 'sex, "Class"', 'class', |
| 'id, sex, "Class"', '2', 'sex=M, Class=1', |
| true, 'column', false |
| ); |
| select * from abalone_out3; |
| |
| -- * working, exclude working, global value_to_drop working |
| select encode_categorical_variables('abalone', 'abalone_out4', |
| '*', '"Class"', |
| 'id', '2', 'M', |
| true, 'column', false |
| ); |
| select * from abalone_out4; |
| |
| -- array output working with dictionary output, |
| -- top with percent input, global value_to_drop |
| select encode_categorical_variables('abalone', 'abalone_out5', |
| 'sex, "Class" > 1', '', |
| 'id', '0.5', 'M', |
| true, 'array', false |
| ); |
| select * from abalone_out5; |
| select * from abalone_out5_dictionary order by index; |
| |
| -- dictionary working, top with more than possible values working |
| select encode_categorical_variables('abalone', 'abalone_out6', |
| 'sex, "Class"', '', |
| 'id', '3', 'class=1', |
| true, 'svec', true |
| ); |
| select * from abalone_out6; |
| select * from abalone_out6_dictionary order by variable, index; |
| |
| -- Test special charaters and unicode |
| DROP TABLE IF EXISTS abalone_special_char; |
| CREATE TABLE abalone_special_char ( |
| id serial, |
| "se$$''x" character varying, |
| "len$$'%*()gth" double precision, |
| diameter double precision, |
| height double precision, |
| "ClaЖss" bigint |
| ); |
| COPY abalone_special_char ("se$$''x", "len$$'%*()gth", diameter, height, "ClaЖss") FROM stdin WITH DELIMITER '|' NULL as '@'; |
| F"F|0.475|0.37|0.125|2 |
| F'F|0.55|0.44|0.15|0 |
| F$$,'}][{F|0.565|0.44|0.155|2 |
| MЖM|0.44|0.365|0.125|0 |
| M@[}(:*;M|0.475|0.37|0.125|2 |
| M,M|0.47|0.355|0.100|1 |
| 'F'F'|0.55|0.44|0.15|0 |
| \. |
| |
| select encode_categorical_variables('abalone_special_char', 'abalone_special_char_out0', '*'); |
| select * from abalone_special_char_out0; |
| |
| select encode_categorical_variables('abalone_special_char', 'abalone_special_char_out1', '"se$$''''x", "len$$''%*()gth"'); |
| select * from abalone_special_char_out1; |
| |
| select encode_categorical_variables('abalone_special_char', |
| 'abalone_special_char_out2', |
| '"se$$''''x", "ClaЖss"', '', |
| 'id', '3', 'claЖss=1', |
| true, 'svec', true); |
| select * from abalone_special_char_out2; |
| select * from abalone_special_char_out2_dictionary order by variable, index; |