blob: d49b66f226ddfb426718dcd34f41d4ca123c394b [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*//* ----------------------------------------------------------------------- */
DROP TABLE IF EXISTS minibatch_preprocessing_input;
CREATE TABLE minibatch_preprocessing_input(
sex TEXT,
id SERIAL NOT NULL,
length DOUBLE PRECISION,
diameter DOUBLE PRECISION,
height DOUBLE PRECISION,
whole DOUBLE PRECISION,
shucked DOUBLE PRECISION,
viscera DOUBLE PRECISION,
shell DOUBLE PRECISION,
rings INTEGER);
INSERT INTO minibatch_preprocessing_input(id,sex,length,diameter,height,whole,shucked,viscera,shell,rings) VALUES
(1040,'F',0.66,0.475,0.18,1.3695,0.641,0.294,0.335,6),
(3160,'F',0.34,0.255,0.085,0.204,0.097,0.021,0.05,6),
(3984,'F',0.585,0.45,0.125,0.874,0.3545,0.2075,0.225,6),
(2551,'I',0.28,0.22,0.08,0.1315,0.066,0.024,0.03,5),
(1246,'I',0.385,0.28,0.09,0.228,0.1025,0.042,0.0655,5),
(519,'M',0.325,0.23,0.09,0.147,0.06,0.034,0.045,4),
(2382,'M',0.155,0.115,0.025,0.024,0.009,0.005,0.0075,5),
(698,'M',0.28,0.205,0.1,0.1165,0.0545,0.0285,0.03,5),
(2381,'M',0.175,0.135,0.04,0.0305,0.011,0.0075,0.01,5),
(516,'M',0.27,0.195,0.08,0.1,0.0385,0.0195,0.03,6);
-- no of rows = 10, buffer_size = 4, so assert that count = 10/4 = 3
\set expected_row_count 3
DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary;
SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'length>0.2', 'ARRAY[diameter,height,whole,shucked,viscera,shell]', 4);
SELECT assert
(
row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out.
Expected:' || :expected_row_count || ' Actual: ' || row_count
) from (select count(*) as row_count from minibatch_preprocessing_out) s;
\set expected_dep_row_count '\'' 2,4,4 '\''
\set expected_dep_col_count '\'' 2,2,2 '\''
\set expected_indep_row_count '\'' 2,4,4 '\''
\set expected_indep_col_count '\'' 6,6,6 '\''
-- assert dimensions for both dependent and independent variable
SELECT assert
(
str_dep_row_count = :expected_dep_row_count, 'Dependent variable row count failed. Actual: ' || str_dep_row_count || ' Expected:' || :expected_dep_row_count
) from
(
select array_to_string(array_agg(row_count order by row_count asc), ',') as str_dep_row_count from (select array_upper(dependent_varname,1) as row_count from minibatch_preprocessing_out order by row_count asc) s
) s;
SELECT assert
(
str_dep_col_count = :expected_dep_col_count, 'Dependent variable col count failed. Actual: ' || str_dep_col_count || ' Expected:' || :expected_dep_col_count
) from
(
select array_to_string(array_agg(col_count order by col_count asc), ',') as str_dep_col_count from (select array_upper(dependent_varname,2) as col_count from minibatch_preprocessing_out order by col_count asc) s
) s;
SELECT assert
(
str_indep_row_count = :expected_indep_row_count, 'Independent variable row count failed. Actual: ' || str_indep_row_count || ' Expected:' || :expected_indep_row_count
) from
(
select array_to_string(array_agg(row_count order by row_count asc), ',') as str_indep_row_count from (select array_upper(independent_varname, 1) as row_count from minibatch_preprocessing_out order by row_count asc) s
) s;
SELECT assert
(
str_indep_col_count = :expected_indep_col_count, 'Independent variable col count failed. Actual: ' || str_indep_col_count || ' Expected:' || :expected_indep_col_count
) from
(
select array_to_string(array_agg(col_count order by col_count asc), ',') as str_indep_col_count from (select array_upper(independent_varname,2) as col_count from minibatch_preprocessing_out order by col_count asc) s
) s;
SELECT assert
(
source_table = 'minibatch_preprocessing_input' AND
output_table = 'minibatch_preprocessing_out' AND
dependent_varname = 'length>0.2' AND
independent_varname = 'ARRAY[diameter,height,whole,shucked,viscera,shell]' AND
buffer_size = 4 AND
class_values = '{f,t}' AND -- we sort the class values in python
num_rows_processed = 10 AND
num_missing_rows_skipped = 0 AND
grouping_cols is NULL,
'Summary Validation failed. Expected:' || __to_char(summary)
) from (select * from minibatch_preprocessing_out_summary) summary;
-- Test null values in x and y
\set expected_row_count 1
DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary;
TRUNCATE TABLE minibatch_preprocessing_input;
INSERT INTO minibatch_preprocessing_input(id,sex,length,diameter,height,whole,shucked,viscera,shell,rings) VALUES
(1040,'F',0.66,0.475,0.18,NULL,0.641,0.294,0.335,6),
(3160,'F',0.34,0.35,0.085,0.204,0.097,0.021,0.05,6),
(3984,NULL,0.585,0.45,0.25,0.874,0.3545,0.2075,0.225,5),
(861,'M',0.595,0.475,NULL,1.1405,0.547,0.231,0.271,6),
(932,NULL,0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6),
(698,'F',0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6),
(922,NULL,0.445,0.335,0.11,NULL,0.2025,0.1095,0.1195,6);
SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'sex', 'ARRAY[length,diameter,height,whole,shucked,viscera,shell]', 2);
SELECT assert
(
row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out.
Expected:' || :expected_row_count || ' Actual: ' || row_count
) from (select count(*) as row_count from minibatch_preprocessing_out) s;
SELECT assert
(num_rows_processed = 2 AND num_missing_rows_skipped = 5,
'Rows processed/skipped validation failed for minibatch_preprocessing_out_summary.
Actual num_rows_processed:' || num_rows_processed || ', Actual num_missing_rows_skipped: ' || num_missing_rows_skipped
) from (select * from minibatch_preprocessing_out_summary) s;
-- Test standardization
DROP TABLE IF EXISTS minibatch_preprocessing_input;
DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary;
CREATE TABLE minibatch_preprocessing_input(x1 INTEGER ,x2 INTEGER ,y TEXT);
INSERT INTO minibatch_preprocessing_input(x1,x2,y) VALUES
(2,10,'y1'),
(4,30,'y2');
SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'y', 'ARRAY[x1,x2]', 2);
-- since the order is not deterministic, we assert for all possible orders
\set expected_normalized_independent_var1 '\'' {{-1, -1},{1, 1}} '\''
\set expected_normalized_independent_var2 '\'' {{1, 1},{-1, -1}} '\''
SELECT assert
(
independent_varname = :expected_normalized_independent_var1 OR
independent_varname = :expected_normalized_independent_var2,
'Standardization check failed. Actual: ' || independent_varname
) from
(
select __to_char(independent_varname) as independent_varname from minibatch_preprocessing_out
) s;
-- Test that the standardization table gets created.
\set expected_row_count 1
SELECT assert
(
row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out_standardization.
Expected:' || :expected_row_count || ' Actual: ' || row_count
) from
(
select count(*) as row_count from minibatch_preprocessing_out_standardization
) s;
-- Test that the summary table gets created.
\set expected_row_count 1
SELECT assert
(
row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out_summary.
Expected:' || :expected_row_count || ' Actual: ' || row_count
) from
(
select count(*) as row_count from minibatch_preprocessing_out_summary
) s;
-- Test for array values in indep column
DROP TABLE IF EXISTS minibatch_preprocessing_input;
DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary;
CREATE TABLE minibatch_preprocessing_input(
id INTEGER,
sex TEXT,
attributes double precision[],
rings INTEGER);
TRUNCATE TABLE minibatch_preprocessing_input;
INSERT INTO minibatch_preprocessing_input(id,sex,attributes) VALUES
(1040,'F',ARRAY[0.66,0.475,0.18,NULL,0.641,0.294,0.335]),
(3160,'F',ARRAY[0.34,0.35,0.085,0.204,0.097,0.021,0.05]),
(3984,NULL,ARRAY[0.585,0.45,0.25,0.874,0.3545,0.2075,0.225]),
(861,'M',ARRAY[0.595,0.475,NULL,1.1405,0.547,0.231,0.271]),
(932,NULL,ARRAY[0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195]),
(NULL,'F',ARRAY[0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195]),
(922,NULL,ARRAY[0.445,0.335,0.11,NULL,0.2025,0.1095,0.1195]);
SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'sex', 'attributes', 1);
SELECT assert
(
row_count = 2, 'Row count validation failed for minibatch_preprocessing_out.
Expected:' || 2 || ' Actual: ' || row_count
) from (select count(*) as row_count from minibatch_preprocessing_out) s;
-- Test for array values in dep column
DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary;
SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'attributes', 'ARRAY[id]', 1);
SELECT assert
(
row_count = 3, 'Row count validation failed array values in dependent variable.
Expected:' || 3 || ' Actual: ' || row_count
) from (select count(*) as row_count from minibatch_preprocessing_out) s;
-- Test for null buffer size
DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary;
SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'attributes', 'ARRAY[id]');
SELECT assert
(
ind_var_rows = dep_var_rows AND ind_var_rows = buffer_size, 'Row count validation failed for null buffer size.
Buffer size from summary table: ' || buffer_size || ' does not match the output table:'
|| ind_var_rows
) from (select max(array_upper(o.dependent_varname, 1)) as dep_var_rows, max(array_upper(o.independent_varname, 1)) as ind_var_rows , s1.buffer_size from minibatch_preprocessing_out o, minibatch_preprocessing_out_summary s1 group by buffer_size) s;