| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| DROP TABLE IF EXISTS minibatch_preprocessing_input; |
| CREATE TABLE minibatch_preprocessing_input( |
| sex TEXT, |
| id SERIAL NOT NULL, |
| length DOUBLE PRECISION, |
| diameter DOUBLE PRECISION, |
| height DOUBLE PRECISION, |
| whole DOUBLE PRECISION, |
| shucked DOUBLE PRECISION, |
| viscera DOUBLE PRECISION, |
| shell DOUBLE PRECISION, |
| rings INTEGER); |
| |
| INSERT INTO minibatch_preprocessing_input(id,sex,length,diameter,height,whole,shucked,viscera,shell,rings) VALUES |
| (1040,'F',0.66,0.475,0.18,1.3695,0.641,0.294,0.335,6), |
| (3160,'F',0.34,0.255,0.085,0.204,0.097,0.021,0.05,6), |
| (3984,'F',0.585,0.45,0.125,0.874,0.3545,0.2075,0.225,6), |
| (2551,'I',0.28,0.22,0.08,0.1315,0.066,0.024,0.03,5), |
| (1246,'I',0.385,0.28,0.09,0.228,0.1025,0.042,0.0655,5), |
| (519,'M',0.325,0.23,0.09,0.147,0.06,0.034,0.045,4), |
| (2382,'M',0.155,0.115,0.025,0.024,0.009,0.005,0.0075,5), |
| (698,'M',0.28,0.205,0.1,0.1165,0.0545,0.0285,0.03,5), |
| (2381,'M',0.175,0.135,0.04,0.0305,0.011,0.0075,0.01,5), |
| (516,'M',0.27,0.195,0.08,0.1,0.0385,0.0195,0.03,6); |
| |
| -- no of rows = 10, buffer_size = 4, so assert that count = 10/4 = 3 |
| \set expected_row_count 3 |
| DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary; |
| SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'length>0.2', 'ARRAY[diameter,height,whole,shucked,viscera,shell]', 4); |
| SELECT assert |
| ( |
| row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out. |
| Expected:' || :expected_row_count || ' Actual: ' || row_count |
| ) from (select count(*) as row_count from minibatch_preprocessing_out) s; |
| |
| \set expected_dep_row_count '\'' 2,4,4 '\'' |
| \set expected_dep_col_count '\'' 2,2,2 '\'' |
| \set expected_indep_row_count '\'' 2,4,4 '\'' |
| \set expected_indep_col_count '\'' 6,6,6 '\'' |
| |
| -- assert dimensions for both dependent and independent variable |
| SELECT assert |
| ( |
| str_dep_row_count = :expected_dep_row_count, 'Dependent variable row count failed. Actual: ' || str_dep_row_count || ' Expected:' || :expected_dep_row_count |
| ) from |
| ( |
| select array_to_string(array_agg(row_count order by row_count asc), ',') as str_dep_row_count from (select array_upper(dependent_varname,1) as row_count from minibatch_preprocessing_out order by row_count asc) s |
| ) s; |
| |
| SELECT assert |
| ( |
| str_dep_col_count = :expected_dep_col_count, 'Dependent variable col count failed. Actual: ' || str_dep_col_count || ' Expected:' || :expected_dep_col_count |
| ) from |
| ( |
| select array_to_string(array_agg(col_count order by col_count asc), ',') as str_dep_col_count from (select array_upper(dependent_varname,2) as col_count from minibatch_preprocessing_out order by col_count asc) s |
| ) s; |
| |
| SELECT assert |
| ( |
| str_indep_row_count = :expected_indep_row_count, 'Independent variable row count failed. Actual: ' || str_indep_row_count || ' Expected:' || :expected_indep_row_count |
| ) from |
| ( |
| select array_to_string(array_agg(row_count order by row_count asc), ',') as str_indep_row_count from (select array_upper(independent_varname, 1) as row_count from minibatch_preprocessing_out order by row_count asc) s |
| ) s; |
| |
| SELECT assert |
| ( |
| str_indep_col_count = :expected_indep_col_count, 'Independent variable col count failed. Actual: ' || str_indep_col_count || ' Expected:' || :expected_indep_col_count |
| ) from |
| ( |
| select array_to_string(array_agg(col_count order by col_count asc), ',') as str_indep_col_count from (select array_upper(independent_varname,2) as col_count from minibatch_preprocessing_out order by col_count asc) s |
| ) s; |
| |
| SELECT assert |
| ( |
| source_table = 'minibatch_preprocessing_input' AND |
| output_table = 'minibatch_preprocessing_out' AND |
| dependent_varname = 'length>0.2' AND |
| independent_varname = 'ARRAY[diameter,height,whole,shucked,viscera,shell]' AND |
| buffer_size = 4 AND |
| class_values = '{f,t}' AND -- we sort the class values in python |
| num_rows_processed = 10 AND |
| num_missing_rows_skipped = 0 AND |
| grouping_cols is NULL, |
| 'Summary Validation failed. Expected:' || __to_char(summary) |
| ) from (select * from minibatch_preprocessing_out_summary) summary; |
| |
| |
| -- Test null values in x and y |
| \set expected_row_count 1 |
| DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary; |
| |
| TRUNCATE TABLE minibatch_preprocessing_input; |
| INSERT INTO minibatch_preprocessing_input(id,sex,length,diameter,height,whole,shucked,viscera,shell,rings) VALUES |
| (1040,'F',0.66,0.475,0.18,NULL,0.641,0.294,0.335,6), |
| (3160,'F',0.34,0.35,0.085,0.204,0.097,0.021,0.05,6), |
| (3984,NULL,0.585,0.45,0.25,0.874,0.3545,0.2075,0.225,5), |
| (861,'M',0.595,0.475,NULL,1.1405,0.547,0.231,0.271,6), |
| (932,NULL,0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6), |
| (698,'F',0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195,6), |
| (922,NULL,0.445,0.335,0.11,NULL,0.2025,0.1095,0.1195,6); |
| SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'sex', 'ARRAY[length,diameter,height,whole,shucked,viscera,shell]', 2); |
| SELECT assert |
| ( |
| row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out. |
| Expected:' || :expected_row_count || ' Actual: ' || row_count |
| ) from (select count(*) as row_count from minibatch_preprocessing_out) s; |
| SELECT assert |
| (num_rows_processed = 2 AND num_missing_rows_skipped = 5, |
| 'Rows processed/skipped validation failed for minibatch_preprocessing_out_summary. |
| Actual num_rows_processed:' || num_rows_processed || ', Actual num_missing_rows_skipped: ' || num_missing_rows_skipped |
| ) from (select * from minibatch_preprocessing_out_summary) s; |
| |
| -- Test standardization |
| DROP TABLE IF EXISTS minibatch_preprocessing_input; |
| DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary; |
| CREATE TABLE minibatch_preprocessing_input(x1 INTEGER ,x2 INTEGER ,y TEXT); |
| INSERT INTO minibatch_preprocessing_input(x1,x2,y) VALUES |
| (2,10,'y1'), |
| (4,30,'y2'); |
| SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'y', 'ARRAY[x1,x2]', 2); |
| |
| -- since the order is not deterministic, we assert for all possible orders |
| \set expected_normalized_independent_var1 '\'' {{-1, -1},{1, 1}} '\'' |
| \set expected_normalized_independent_var2 '\'' {{1, 1},{-1, -1}} '\'' |
| |
| SELECT assert |
| ( |
| independent_varname = :expected_normalized_independent_var1 OR |
| independent_varname = :expected_normalized_independent_var2, |
| 'Standardization check failed. Actual: ' || independent_varname |
| ) from |
| ( |
| select __to_char(independent_varname) as independent_varname from minibatch_preprocessing_out |
| ) s; |
| |
| |
| -- Test that the standardization table gets created. |
| \set expected_row_count 1 |
| SELECT assert |
| ( |
| row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out_standardization. |
| Expected:' || :expected_row_count || ' Actual: ' || row_count |
| ) from |
| ( |
| select count(*) as row_count from minibatch_preprocessing_out_standardization |
| ) s; |
| |
| -- Test that the summary table gets created. |
| \set expected_row_count 1 |
| SELECT assert |
| ( |
| row_count = :expected_row_count, 'Row count validation failed for minibatch_preprocessing_out_summary. |
| Expected:' || :expected_row_count || ' Actual: ' || row_count |
| ) from |
| ( |
| select count(*) as row_count from minibatch_preprocessing_out_summary |
| ) s; |
| |
| -- Test for array values in indep column |
| DROP TABLE IF EXISTS minibatch_preprocessing_input; |
| DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary; |
| CREATE TABLE minibatch_preprocessing_input( |
| id INTEGER, |
| sex TEXT, |
| attributes double precision[], |
| rings INTEGER); |
| TRUNCATE TABLE minibatch_preprocessing_input; |
| INSERT INTO minibatch_preprocessing_input(id,sex,attributes) VALUES |
| (1040,'F',ARRAY[0.66,0.475,0.18,NULL,0.641,0.294,0.335]), |
| (3160,'F',ARRAY[0.34,0.35,0.085,0.204,0.097,0.021,0.05]), |
| (3984,NULL,ARRAY[0.585,0.45,0.25,0.874,0.3545,0.2075,0.225]), |
| (861,'M',ARRAY[0.595,0.475,NULL,1.1405,0.547,0.231,0.271]), |
| (932,NULL,ARRAY[0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195]), |
| (NULL,'F',ARRAY[0.445,0.335,0.11,0.4355,0.2025,0.1095,0.1195]), |
| (922,NULL,ARRAY[0.445,0.335,0.11,NULL,0.2025,0.1095,0.1195]); |
| SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'sex', 'attributes', 1); |
| SELECT assert |
| ( |
| row_count = 2, 'Row count validation failed for minibatch_preprocessing_out. |
| Expected:' || 2 || ' Actual: ' || row_count |
| ) from (select count(*) as row_count from minibatch_preprocessing_out) s; |
| |
| -- Test for array values in dep column |
| DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary; |
| SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'attributes', 'ARRAY[id]', 1); |
| SELECT assert |
| ( |
| row_count = 3, 'Row count validation failed array values in dependent variable. |
| Expected:' || 3 || ' Actual: ' || row_count |
| ) from (select count(*) as row_count from minibatch_preprocessing_out) s; |
| |
| -- Test for null buffer size |
| DROP TABLE IF EXISTS minibatch_preprocessing_out, minibatch_preprocessing_out_standardization, minibatch_preprocessing_out_summary; |
| SELECT minibatch_preprocessor('minibatch_preprocessing_input', 'minibatch_preprocessing_out', 'attributes', 'ARRAY[id]'); |
| SELECT assert |
| ( |
| ind_var_rows = dep_var_rows AND ind_var_rows = buffer_size, 'Row count validation failed for null buffer size. |
| Buffer size from summary table: ' || buffer_size || ' does not match the output table:' |
| || ind_var_rows |
| ) from (select max(array_upper(o.dependent_varname, 1)) as dep_var_rows, max(array_upper(o.independent_varname, 1)) as ind_var_rows , s1.buffer_size from minibatch_preprocessing_out o, minibatch_preprocessing_out_summary s1 group by buffer_size) s; |