blob: 3d9d0d93148081e645701500db3b0719119527b3 [file] [log] [blame]
/* ---------------------------------------------------------------------*//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*//* ---------------------------------------------------------------------*/
m4_include(`SQLCommon.m4')
\i m4_regexp(MODULE_PATHNAME,
`\(.*\)libmadlib\.so',
`\1../../modules/deep_learning/test/madlib_keras_cifar.setup.sql_in'
)
-- Please do not break up the compile_params string
-- It might break the assertion
DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary;
SELECT madlib_keras_fit(
'cifar_10_sample_batched',
'keras_saved_out',
'model_arch',
1,
$$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
3);
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample',
'id',
'x',
'cifar10_predict',
NULL,
FALSE);
-- Validate that prediction output table exists and has correct schema
SELECT assert(UPPER(pg_typeof(id)::TEXT) = 'INTEGER', 'id column should be INTEGER type')
FROM cifar10_predict;
SELECT assert(UPPER(pg_typeof(y)::TEXT) =
'SMALLINT', 'prediction column should be SMALLINT type')
FROM cifar10_predict;
-- Validate correct number of rows returned.
SELECT assert(COUNT(*)=4, 'Output table of madlib_keras_predict should have two rows')
FROM cifar10_predict;
-- First test that all values are in set of class values; if this breaks, it's definitely a problem.
SELECT assert(y IN (0,1),
'Predicted value not in set of defined class values for model')
FROM cifar10_predict;
DROP TABLE IF EXISTS cifar10_predict;
SELECT assert(trap_error($TRAP$SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_batched',
'buffer_id',
'independent_var',
'cifar10_predict',
NULL,
FALSE);$TRAP$) = 1,
'Passing batched image table to predict should error out.');
-- Test with pred_type=0.2
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample',
'id',
'x',
'cifar10_predict',
0.2,
FALSE);
SELECT assert(UPPER(pg_typeof(prob)::TEXT) =
'DOUBLE PRECISION', 'column prob should be double precision type')
FROM cifar10_predict;
SELECT assert(COUNT(*)=4, 'Predict out table must have exactly three cols.')
FROM pg_attribute
WHERE attrelid='cifar10_predict'::regclass AND attnum>0;
-- Test with pred_type=2
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample',
'id',
'x',
'cifar10_predict',
2,
FALSE);
SELECT assert(UPPER(pg_typeof(prob)::TEXT) =
'DOUBLE PRECISION', 'column prob should be double precision type')
FROM cifar10_predict;
SELECT assert(COUNT(*)=4, 'Predict out table must have exactly three cols.')
FROM pg_attribute
WHERE attrelid='cifar10_predict'::regclass AND attnum>0;
-- Tests with text class values:
-- Create a new table using the text based column for dep var.
DROP TABLE IF EXISTS cifar_10_sample_text_batched;
m4_changequote(`<!', `!>')
CREATE TABLE cifar_10_sample_text_batched AS
SELECT buffer_id, independent_var, dependent_var,
independent_var_shape, dependent_var_shape
m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!, __dist_key__ !>)
FROM cifar_10_sample_batched m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!DISTRIBUTED BY (__dist_key__)!>);
-- Insert a new row with NULL as the dependent var (one-hot encoded)
UPDATE cifar_10_sample_text_batched
SET dependent_var = convert_array_to_bytea(ARRAY[0,0,1,0,0]::smallint[]) WHERE buffer_id=0;
UPDATE cifar_10_sample_text_batched
SET dependent_var = convert_array_to_bytea(ARRAY[0,1,0,0,0]::smallint[]) WHERE buffer_id=1;
INSERT INTO cifar_10_sample_text_batched(m4_ifdef(<!__POSTGRESQL__!>, <!!>, <! __dist_key__, !>) buffer_id, independent_var, dependent_var, independent_var_shape, dependent_var_shape)
SELECT m4_ifdef(<!__POSTGRESQL__!>, <!!>, <! __dist_key__, !>) 2 AS buffer_id, independent_var,
convert_array_to_bytea(ARRAY[0,1,0,0,0]::smallint[]) AS dependent_var,
independent_var_shape, dependent_var_shape
FROM cifar_10_sample_batched WHERE cifar_10_sample_batched.buffer_id=0;
UPDATE cifar_10_sample_text_batched SET dependent_var_shape = ARRAY[1,5];
m4_changequote(<!`!>,<!'!>)
-- Create the necessary summary table for the batched input.
DROP TABLE IF EXISTS cifar_10_sample_text_batched_summary;
CREATE TABLE cifar_10_sample_text_batched_summary(
source_table text,
output_table text,
dependent_varname text,
independent_varname text,
dependent_vartype text,
class_values text[],
buffer_size integer,
normalizing_const numeric);
INSERT INTO cifar_10_sample_text_batched_summary values (
'cifar_10_sample',
'cifar_10_sample_text_batched',
'y_text',
'x',
'text',
ARRAY[NULL,'cat','dog','bird','fish'],
1,
255.0);
DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary;
SELECT madlib_keras_fit(
'cifar_10_sample_text_batched',
'keras_saved_out',
'model_arch',
2,
$$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
3);
-- Predict with pred_type=prob
DROP TABLE IF EXISTS cifar_10_sample_text;
CREATE TABLE cifar_10_sample_text AS
SELECT id, x, y_text
FROM cifar_10_sample;
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_text',
'id',
'x',
'cifar10_predict',
0.2,
FALSE);
-- Validate the output datatype of newly created prediction columns
-- for prediction type = 'prob' and class_values 'TEXT' with NULL as a valid
-- class_values
SELECT assert(UPPER(pg_typeof(prob)::TEXT) =
'DOUBLE PRECISION', 'column prob should be double precision type')
FROM cifar10_predict;
SELECT assert(COUNT(*)=4, 'Predict out table must have exactly four cols.')
FROM pg_attribute
WHERE attrelid='cifar10_predict'::regclass AND attnum>0;
-- Predict with pred_type=response
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_text',
'id',
'x',
'cifar10_predict',
'response',
FALSE);
-- Validate the output datatype of newly created prediction columns
-- for prediction type = 'response' and class_values 'TEXT' with NULL
-- as a valid class_values
SELECT assert(UPPER(pg_typeof(y_text)::TEXT) =
'TEXT', 'prediction column should be TEXT type')
FROM cifar10_predict LIMIT 1;
-- Tests where the assumption is user has one-hot encoded, so class_values
-- in input summary table will be NULL.
UPDATE keras_saved_out_summary SET class_values=NULL;
-- Predict with pred_type=all
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_text',
'id',
'x',
'cifar10_predict',
'prob',
FALSE);
SELECT assert(count(0) = 5, 'y should get 5 values because dependent_var = [0,0,1,0,0]')
FROM cifar10_predict WHERE id = 0;
-- Predict with pred_type=response
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_text',
'id',
'x',
'cifar10_predict',
'response',
FALSE);
-- Validate the output datatype of newly created prediction column
-- for prediction type = 'response' and class_value = NULL
-- Returns: Index of class value in user's one-hot encoded data with
-- highest probability
SELECT assert(UPPER(pg_typeof(y_text)::TEXT) =
'TEXT', 'column y_text should be text type')
FROM cifar10_predict LIMIT 1;
-- Test predict with INTEGER class_values
-- with NULL as a valid class value
-- Update output_summary table to reflect
-- class_values {NULL,0,1,4,5} and dependent_vartype is SMALLINT
UPDATE keras_saved_out_summary
SET dependent_varname = 'y',
class_values = ARRAY[NULL,0,1,4,5]::INTEGER[],
dependent_vartype = 'smallint';
-- Predict with pred_type=prob
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample',
'id',
'x',
'cifar10_predict',
'prob',
FALSE);
SELECT assert(count(*)=5, 'Predict out table must have 5 different y values')
FROM cifar10_predict WHERE id = 0;
-- Predict with pred_type=response
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample',
'id',
'x',
'cifar10_predict',
'response',
FALSE);
-- Validate the output datatype of newly created prediction column
-- for prediction type = 'response' and class_values 'TEXT' with NULL
-- as a valid class_values
-- Returns: class_value with highest probability
SELECT assert(count(*)=1, 'Predict out table must have a single response')
FROM cifar10_predict WHERE id = 0;
-- Predict with correctly shaped data, must go thru.
-- Update output_summary table to reflect
-- class_values, num_classes and model_id for shaped data
DROP TABLE IF EXISTS keras_saved_out, keras_saved_out_summary;
SELECT madlib_keras_fit(
'cifar_10_sample_test_shape_batched',
'keras_saved_out',
'model_arch',
3,
$$ optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']$$::text,
$$ batch_size=2, epochs=1, verbose=0 $$::text,
3);
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_test_shape',
'id',
'x',
'cifar10_predict',
'prob',
FALSE);
-- Prediction with incorrectly shaped data must error out.
DROP TABLE IF EXISTS cifar10_predict;
SELECT assert(trap_error($TRAP$SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample',
'id',
'x',
'cifar10_predict',
'prob',
FALSE);$TRAP$) = 1,
'Input shape is (32, 32, 3) but model was trained with (3, 32, 32). Should have failed.');
-- Test model_arch is retrieved from model data table and not model architecture
DROP TABLE IF EXISTS model_arch;
DROP TABLE IF EXISTS cifar10_predict;
SELECT madlib_keras_predict(
'keras_saved_out',
'cifar_10_sample_test_shape',
'id',
'x',
'cifar10_predict',
'prob',
FALSE);
-- Test multi model
\i m4_regexp(MODULE_PATHNAME,
`\(.*\)libmadlib\.so',
`\1../../modules/deep_learning/test/madlib_keras_iris.setup.sql_in'
)
m4_changequote(`<!', `!>')
m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!
DROP TABLE IF EXISTS mst_table, mst_table_summary;
SELECT load_model_selection_table(
'iris_model_arch',
'mst_table',
ARRAY[1],
ARRAY[
$$loss='categorical_crossentropy', optimizer='Adam(lr=0.01)', metrics=['accuracy']$$,
$$loss='categorical_crossentropy', optimizer='Adam(lr=0.001)', metrics=['accuracy']$$,
$$loss='categorical_crossentropy', optimizer='Adam(lr=0.0001)', metrics=['accuracy']$$
],
ARRAY[
$$batch_size=50, epochs=1$$
]
);
DROP TABLE IF EXISTS iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
SELECT setseed(0);
SELECT madlib_keras_fit_multiple_model(
'iris_data_packed',
'iris_multiple_model',
'mst_table',
6,
FALSE
);
DROP TABLE IF EXISTS iris_predict;
SELECT madlib_keras_predict(
'iris_multiple_model', -- model
'iris_train', -- test_table
'id', -- id column
'attributes', -- independent var
'iris_predict', -- output table
'response', -- prediction type
NULL, -- use gpus
2 -- mst_key to use
);
SELECT assert(relative_error(test_accuracy, training_metrics_final) < 0.1,
'Predict output validation failed.')
FROM iris_multiple_model_info i,
(SELECT count(*)/(150*0.8) AS test_accuracy FROM
(SELECT iris_train.class_text AS actual, iris_predict.class_text AS estimated
FROM iris_predict INNER JOIN iris_train
ON iris_train.id=iris_predict.id)q
WHERE q.actual=q.estimated) q2
WHERE i.mst_key = 2;
!>)