blob: 6a72975471b0b3bcfa3a32affad590100af3f1f7 [file] [log] [blame]
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
CREATE OR REPLACE
PROCEDURE model_selection_sp(
dataset TEXT, --dataset name
selected_columns TEXT[], --used columns
budget TEXT, --user given time budget
batch_size INTEGER, --batch size, for profiling, filtering
config_file TEXT --config file path
)
LANGUAGE plpgsql
AS $$
DECLARE
-- global inputs/outputs
result_status TEXT;
column_list TEXT;
-- UDF outputs
score_time TEXT;
train_time TEXT;
coordinator_k integer;
coordinator_u integer;
coordinator_n integer;
BEGIN
-- combine the columns into a string
column_list := array_to_string(selected_columns, ', ');
-- 1. Profiling time to score a model with TFMEM
EXECUTE format('
WITH batch_rows AS (
SELECT %s
FROM %I
ORDER BY RANDOM()
LIMIT %s OFFSET 0
)
SELECT profiling_filtering_phase(
json_agg(row_to_json(t))::text, %L
)
FROM batch_rows AS t', column_list, dataset, batch_size, config_file) INTO result_status;
score_time := json_extract_path_text(result_status::json, 'time');
RAISE NOTICE '1. profiling_filtering_phase, get score_time: %', score_time;
-- 2. Profiling time of training a model for one epoch
EXECUTE format('
WITH batch_rows AS (
SELECT %s
FROM %I
ORDER BY RANDOM()
LIMIT %s OFFSET 0
)
SELECT profiling_refinement_phase(
json_agg(row_to_json(t))::text, %L
)
FROM batch_rows AS t', column_list, dataset, batch_size, config_file) INTO result_status;
train_time := json_extract_path_text(result_status::json, 'time');
RAISE NOTICE '2. profiling_refinement_phase, get train_time: %', train_time;
-- 3. Coordinator to get N, K ,U
EXECUTE format('SELECT "coordinator"(%L, %L, %L, false, %L)', score_time, train_time, budget, config_file) INTO result_status;
coordinator_k := (json_extract_path_text(result_status::json, 'k'))::integer;
coordinator_u := (json_extract_path_text(result_status::json, 'u'))::integer;
coordinator_n := (json_extract_path_text(result_status::json, 'n'))::integer;
RAISE NOTICE '3. coordinator result: k = %, u = %, n = %', coordinator_k, coordinator_u, coordinator_n;
-- 4. Run filtering phase to get top K models.
EXECUTE format('
WITH batch_rows AS (
SELECT %s
FROM %I
ORDER BY RANDOM()
LIMIT %s OFFSET 0
)
SELECT filtering_phase(
json_agg(row_to_json(t))::text, %s, %s, %L
)
FROM batch_rows AS t', column_list, dataset, batch_size, coordinator_n, coordinator_k, config_file) INTO result_status;
RAISE NOTICE '4. run filtering phase, k models = %', result_status;
END; $$;