examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/pg_interface.py - singa - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 import calendar
 import os
 import time
 import requests
 import json
 from typing import List, Dict
 import traceback
 import orjson
 from argparse import Namespace
 from shared_config import parse_config_arguments


 def exception_catcher(func):
     def wrapper(encoded_str: str):
         try:
             # each functon accepts a json string
             params = json.loads(encoded_str)
             config_file = params.get("config_file")

             # Parse the config file
             args = parse_config_arguments(config_file)

             # Set the environment variables
             ts = calendar.timegm(time.gmtime())
             os.environ.setdefault("base_dir", args.base_dir)
             os.environ.setdefault("log_logger_folder_name", args.log_folder)
             os.environ.setdefault("log_file_name", args.log_name + "_" + str(ts) + ".log")

             # Call the original function with the parsed parameters
             return func(params, args)
         except Exception as e:
             return orjson.dumps(
                 {"Errored": traceback.format_exc()}).decode('utf-8')

     return wrapper

 from torch.utils.data import Dataset
 import torch
 class LibsvmDataset(Dataset):
     """ Dataset loader for Libsvm data format """

     @staticmethod
     def decode_libsvm(columns):
         map_func = lambda pair: (int(pair[0]), float(pair[1]))
         id, value = zip(*map(lambda col: map_func(col.split(':')), columns[:-1]))
         sample = {'id': torch.LongTensor(id),
                   'value': torch.FloatTensor(value),
                   'y': float(columns[-1])}
         return sample

     @staticmethod
     def pre_processing(mini_batch_data: List[Dict]):
         sample_lines = len(mini_batch_data)
         nfields = len(mini_batch_data[0].keys()) - 1
         feat_id = torch.LongTensor(sample_lines, nfields)
         feat_value = torch.FloatTensor(sample_lines, nfields)
         y = torch.FloatTensor(sample_lines)

         for i in range(sample_lines):
             row_value = mini_batch_data[i].values()
             sample = LibsvmDataset.decode_libsvm(list(row_value))
             feat_id[i] = sample['id']
             feat_value[i] = sample['value']
             y[i] = sample['y']
         return feat_id, feat_value, y, sample_lines

     def __init__(self, mini_batch_data: List[Dict]):
         self.feat_id, self.feat_value, self.y, self.nsamples = \
             LibsvmDataset.pre_processing(mini_batch_data)

     def __len__(self):
         return self.nsamples

     def __getitem__(self, idx):
         return {'id': self.feat_id[idx],
                 'value': self.feat_value[idx],
                 'y': self.y[idx]}


 def generate_dataloader(mini_batch_data, args):
     from src.logger import logger
     from torch.utils.data import DataLoader
     logger.info(f"Begin to preprocessing dataset")
     begin_time = time.time()
     dataloader = DataLoader(LibsvmDataset(mini_batch_data),
                             batch_size=args.batch_size,
                             shuffle=True)
     logger.info(f"Preprocessing dataset Done ! time_usage = {time.time() - begin_time}")
     return dataloader


 @exception_catcher
 def model_selection(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run model_selection on UDF runtime with CPU only")

     mini_batch_data = json.loads(params["mini_batch"])
     budget = float(params["budget"])

     from src.eva_engine.run_ms import RunModelSelection

     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)

     data_loader = [dataloader, dataloader, dataloader]

     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     best_arch, best_arch_performance, time_usage, _, p1_trace_highest_score, p1_trace_highest_scored_models_id = \
         rms.select_model_online_clean(
             budget=budget,
             data_loader=data_loader,
             only_phase1=False,
             run_workers=1)

     return orjson.dumps(
         {"best_arch": best_arch,
          "best_arch_performance": best_arch_performance,
          "time_usage": time_usage}).decode('utf-8')


 @exception_catcher
 def profiling_filtering_phase(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run profiling_filtering_phase CPU only")

     mini_batch_m = params["mini_batch"]

     from src.eva_engine.run_ms import RunModelSelection

     logger.info(f"begin run filtering phase at {os.getcwd()}, with {mini_batch_m}")

     mini_batch_data = json.loads(mini_batch_m)
     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
     data_loader = [dataloader, dataloader, dataloader]

     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     score_time_per_model = rms.profile_filtering(data_loader=data_loader)

     return orjson.dumps({"time": score_time_per_model}).decode('utf-8')


 @exception_catcher
 def profiling_refinement_phase(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run profiling_refinement_phase CPU only")

     mini_batch_m = params["mini_batch"]

     from src.eva_engine.run_ms import RunModelSelection

     mini_batch_data = json.loads(mini_batch_m)

     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
     data_loader = [dataloader, dataloader, dataloader]

     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     train_time_per_epoch = rms.profile_refinement(data_loader=data_loader)

     return orjson.dumps({"time": train_time_per_epoch}).decode('utf-8')


 @exception_catcher
 def coordinator(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run coordinator")
     # print (f"begin run coordinator")

     budget = float(params["budget"])
     score_time_per_model = float(params["score_time_per_model"])
     train_time_per_epoch = float(params["train_time_per_epoch"])
     only_phase1 = True if params["only_phase1"].lower() == "true" else False

     from src.eva_engine.run_ms import RunModelSelection

     logger.info(f"coordinator params: budget={budget}, "
                 f"score_time_per_model={score_time_per_model}, "
                 f"train_time_per_epoch={train_time_per_epoch}, "
                 f"only_phase1={only_phase1}")

     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     K, U, N = rms.coordination(
         budget=budget,
         score_time_per_model=score_time_per_model,
         train_time_per_epoch=train_time_per_epoch,
         only_phase1=only_phase1)

     return orjson.dumps(
         {"k": K, "u": U, "n": N}).decode('utf-8')


 @exception_catcher
 def filtering_phase(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run filtering_phase CPU only")

     # mini_batch_m = params["mini_batch"]
     n = int(params["n"])
     k = int(params["k"])

     from src.eva_engine.run_ms import RunModelSelection

     # mini_batch_data = json.loads(mini_batch_m)
     # dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)

     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     k_models, _, _, _ = rms.filtering_phase(N=n, K=k)

     return orjson.dumps({"k_models": k_models}).decode('utf-8')


 @exception_catcher
 def filtering_phase_dataLoader(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run filtering_phase CPU only")
     # print (f"begin run filtering_phase CPU only")

     mini_batch_m = params["mini_batch"]
     # print ("mini_batch_m: ", mini_batch_m)


     n = int(params["n"])
     k = int(params["k"])

     from src.eva_engine.run_ms import RunModelSelection

     mini_batch_data = json.loads(mini_batch_m)
     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)

     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)

     return orjson.dumps({"k_models": k_models}).decode('utf-8')


 @exception_catcher
 def refinement_phase(params: dict, args: Namespace):
     mini_batch_m = params["mini_batch"]
     return orjson.dumps(
         {"k_models": "k_models"}).decode('utf-8')


 @exception_catcher
 def model_selection_workloads(params: dict, args: Namespace):
     """
     Run filtering (explore N models) and refinement phase (refine K models) for benchmarking latency.
     """

     mini_batch_m = params["mini_batch"]
     n = int(params["n"])
     k = int(params["k"])

     from src.logger import logger
     logger.info(f"begin run model_selection_workloads on CPU only, explore N={n} and K={k}")

     from src.eva_engine.run_ms import RunModelSelection

     mini_batch_data = json.loads(mini_batch_m)
     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)
     best_arch, best_arch_performance, _ = rms.refinement_phase(
         U=1,
         k_models=k_models,
         train_loader=dataloader,
         valid_loader=dataloader)

     return orjson.dumps(
         {"best_arch": best_arch,
          "best_arch_performance": best_arch_performance,
          }).decode('utf-8')


 @exception_catcher
 def test_io(params: dict, args: Namespace):
     return orjson.dumps({"inputs are": json.dumps(params)}).decode('utf-8')


 @exception_catcher
 def model_selection_trails(params: dict, args: Namespace):
     from src.logger import logger
     logger.info(f"begin run model_selection_trails CPU  + GPU")

     mini_batch_data = json.loads(params["mini_batch"])
     budget = float(params["budget"])

     # 1. launch cache service
     columns = list(mini_batch_data[0].keys())
     requests.post(args.cache_svc_url,
                   json={'columns': columns, 'name_space': "train", 'table_name': "dummy",
                         "batch_size": len(mini_batch_data)})
     requests.post(args.cache_svc_url,
                   json={'columns': columns, 'name_space': "valid", 'table_name': "dummy",
                         "batch_size": len(mini_batch_data)})

     from src.eva_engine.run_ms import RunModelSelection

     # 2. profiling & coordination
     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
     data_loader = [dataloader, dataloader, dataloader]
     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)

     begin_time = time.time()
     score_time_per_model = rms.profile_filtering(data_loader)
     train_time_per_epoch = rms.profile_refinement(data_loader)
     K, U, N = rms.coordination(budget, score_time_per_model, train_time_per_epoch, False)

     # 3. filtering
     k_models, all_models, p1_trace_highest_score, p1_trace_highest_scored_models_id = rms.filtering_phase(
         N, K, train_loader=data_loader[0])

     # 4. Run refinement pahse
     data = {'u': 1, 'k_models': k_models, "table_name": "dummy", "config_file": args.config_file}
     response = requests.post(args.refinement_url, json=data).json()

     best_arch, best_arch_performance = response["best_arch"], response["best_arch_performance"]

     end_time = time.time()
     real_time_usage = end_time - begin_time

     return orjson.dumps(
         {"best_arch": best_arch,
          "best_arch_performance": best_arch_performance,
          "time_usage": real_time_usage}).decode('utf-8')


 @exception_catcher
 def model_selection_trails_workloads(params: dict, args: Namespace):
     """
     Run filtering (explore N models) and refinement phase (refine K models) for benchmarking latency.
     """

     begin_time = time.time()
     mini_batch_data = json.loads(params["mini_batch"])
     n = int(params["n"])
     k = int(params["k"])

     # 1. launch cache service, for both train and valid.
     # todo: use real data table or others
     columns = list(mini_batch_data[0].keys())
     requests.post(args.cache_svc_url,
                   json={'columns': columns, 'name_space': "train", 'table_name': "dummy",
                         "batch_size": len(mini_batch_data)})
     requests.post(args.cache_svc_url,
                   json={'columns': columns, 'name_space': "valid", 'table_name': "dummy",
                         "batch_size": len(mini_batch_data)})

     from src.logger import logger
     logger.info(f"begin run model_selection_trails_workloads CPU + GPU, explore N={n} and K={k}")

     from src.eva_engine.run_ms import RunModelSelection

     # 2. filtering
     dataloader = generate_dataloader(mini_batch_data=mini_batch_data, args=args)
     rms = RunModelSelection(args.search_space, args, is_simulate=args.is_simulate)
     k_models, _, _, _ = rms.filtering_phase(N=n, K=k, train_loader=dataloader)

     # 3. Run refinement pahse
     data = {'u': 1, 'k_models': k_models, "table_name": "dummy", "config_file": args.config_file}
     response = requests.post(args.refinement_url, json=data).json()
     best_arch, best_arch_performance = response["best_arch"], response["best_arch_performance"]
     real_time_usage = time.time() - begin_time

     return orjson.dumps(
         {"best_arch": best_arch,
          "best_arch_performance": best_arch_performance,
          "time_usage": real_time_usage
          }).decode('utf-8')


 # benchmarking code here
 @exception_catcher
 def benchmark_filtering_phase_latency(params: dict, args: Namespace):
     from src.logger import logger
     from src.common.structure import ModelAcquireData
     from src.controller.sampler_all.seq_sampler import SequenceSampler
     from src.eva_engine.phase1.evaluator import P1Evaluator
     from src.search_space.init_search_space import init_search_space
     from src.tools.io_tools import write_json, read_json
     from src.tools.res_measure import print_cpu_gpu_usage
     import torch
     logger.info(f"begin run filtering_phase CPU only")

     args.models_explore = int(params["explore_models"])

     output_file = f"{args.result_dir}/score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
     time_output_file = f"{args.result_dir}/time_score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
     res_output_file = f"{args.result_dir}/resource_score_{args.search_space}_{args.dataset}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"

     # start the resource monitor
     stop_event, thread = print_cpu_gpu_usage(interval=0.5, output_file=res_output_file)

     db_config = {
         "db_name": args.db_name,
         "db_user": args.db_user,
         "db_host": args.db_host,
         "db_port": args.db_port,
     }

     search_space_ins = init_search_space(args)
     _evaluator = P1Evaluator(device=args.device,
                              num_label=args.num_labels,
                              dataset_name=args.dataset,
                              search_space_ins=search_space_ins,
                              train_loader=None,
                              is_simulate=False,
                              metrics=args.tfmem,
                              enable_cache=args.embedding_cache_filtering,
                              db_config=db_config)

     sampler = SequenceSampler(search_space_ins)
     explored_n = 0
     result = read_json(output_file)
     print(f"begin to score all, currently we already explored {len(result.keys())}")
     logger.info(f"begin to score all, currently we already explored {len(result.keys())}")

     while True:
         arch_id, arch_micro = sampler.sample_next_arch()
         if arch_id is None:
             break
         if arch_id in result:
             continue
         if explored_n > args.models_explore:
             break
         # run the model selection
         model_encoding = search_space_ins.serialize_model_encoding(arch_micro)
         model_acquire_data = ModelAcquireData(model_id=arch_id,
                                               model_encoding=model_encoding,
                                               is_last=False)
         data_str = model_acquire_data.serialize_model()
         model_score = _evaluator.p1_evaluate(data_str)
         explored_n += 1
         result[arch_id] = model_score
         if explored_n % 50 == 0:
             logger.info(f"Evaluate {explored_n} models")
             print(f"Evaluate {explored_n} models")

     if _evaluator.if_cuda_avaiable():
         torch.cuda.synchronize()

     # the first two are used for warming up
     _evaluator.time_usage["io_latency"] = \
         sum(_evaluator.time_usage["track_io_model_load"][2:]) + \
         sum(_evaluator.time_usage["track_io_model_release_each_50"]) + \
         sum(_evaluator.time_usage["track_io_model_init"][2:]) + \
         sum(_evaluator.time_usage["track_io_res_load"][2:]) + \
         sum(_evaluator.time_usage["track_io_data_retrievel"][2:]) + \
         sum(_evaluator.time_usage["track_io_data_preprocess"][2:])

     _evaluator.time_usage["compute_latency"] = sum(_evaluator.time_usage["track_compute"][2:])
     _evaluator.time_usage["latency"] = _evaluator.time_usage["io_latency"] + _evaluator.time_usage["compute_latency"]

     _evaluator.time_usage["avg_compute_latency"] = \
         _evaluator.time_usage["compute_latency"] \
         / len(_evaluator.time_usage["track_compute"][2:])

     write_json(output_file, result)
     # compute time
     write_json(time_output_file, _evaluator.time_usage)

     # Then, at the end of your program, you can stop the thread:
     print("Done, time sleep for 10 seconds")
     # wait the resource montor flush
     time.sleep(10)
     stop_event.set()
     thread.join()

     return orjson.dumps({"Write to": time_output_file}).decode('utf-8')


 # Micro benchmarking filterting phaes
 search_space_ins = None
 _evaluator = None
 sampler = None


 @exception_catcher
 def in_db_filtering_state_init(params: dict, args: Namespace):
     global search_space_ins, _evaluator, sampler
     from src.logger import logger
     from src.controller.sampler_all.seq_sampler import SequenceSampler
     from src.eva_engine.phase1.evaluator import P1Evaluator
     from src.search_space.init_search_space import init_search_space

     db_config = {
         "db_name": args.db_name,
         "db_user": args.db_user,
         "db_host": args.db_host,
         "db_port": args.db_port,
     }

     # init once
     # params["eva_results"] == "null" means it a new job
     if params["eva_results"] == "null" or (search_space_ins is None and _evaluator is None and sampler is None):
         logger.info(f'New job = {params["eva_results"]}, search_space_ins = {search_space_ins}')
         search_space_ins = init_search_space(args)
         _evaluator = P1Evaluator(device=args.device,
                                  num_label=args.num_labels,
                                  dataset_name=params["dataset"],
                                  search_space_ins=search_space_ins,
                                  train_loader=None,
                                  is_simulate=False,
                                  metrics=args.tfmem,
                                  enable_cache=args.embedding_cache_filtering,
                                  db_config=db_config,
                                  data_retrievel="spi")
         sampler = SequenceSampler(search_space_ins)

     arch_id, arch_micro = sampler.sample_next_arch()
     model_encoding = search_space_ins.serialize_model_encoding(arch_micro)

     return orjson.dumps({"model_encoding": model_encoding, "arch_id": arch_id}).decode('utf-8')


 @exception_catcher
 def in_db_filtering_evaluate(params: dict, args: Namespace):
     global search_space_ins, _evaluator, sampler
     from src.common.structure import ModelAcquireData
     from src.logger import logger
     try:
         if search_space_ins is None and _evaluator is None and sampler is None:
             logger.info("search_space_ins, _evaluator, sampler is None")
             return orjson.dumps({"error": "erroed, plz call init first"}).decode('utf-8')

         sampled_result = json.loads(params["sample_result"])
         arch_id, model_encoding = str(sampled_result["arch_id"]), str(sampled_result["model_encoding"])

         mini_batch = json.loads(params["mini_batch"])
         if mini_batch["status"] == "error":
             return orjson.dumps({"error": mini_batch["message"]}).decode('utf-8')
         logger.info(f"Begin evaluate {params['model_index']}, "
                     f"with size of batch = {len(mini_batch['data'])}, "
                     f"size of columns = {len(mini_batch['data'][0])}")
         model_acquire_data = ModelAcquireData(model_id=arch_id,
                                               model_encoding=model_encoding,
                                               is_last=False,
                                               spi_seconds=float(params["spi_seconds"]),
                                               spi_mini_batch=mini_batch["data"],
                                               )

         model_score = _evaluator._p1_evaluate_online(model_acquire_data)
         logger.info(f'Done evaluate {params["model_index"]}, '
                     f'with {orjson.dumps({"index": params["model_index"], "score": model_score}).decode("utf-8")}')
     except:
         logger.info(orjson.dumps(
             {"Errored": traceback.format_exc()}).decode('utf-8'))

         return orjson.dumps(
             {"Errored": traceback.format_exc()}).decode('utf-8')

     return orjson.dumps({"index": params["model_index"], "score": model_score}).decode('utf-8')


 @exception_catcher
 def records_results(params: dict, args: Namespace):
     global search_space_ins, _evaluator, sampler
     from src.tools.io_tools import write_json
     from src.logger import logger

     try:
         time_output_file = f"{args.result_dir}/time_score_{args.search_space}_{params['dataset']}_batch_size_{args.batch_size}_{args.device}_{args.tfmem}.json"
         _evaluator.time_usage["io_latency"] = \
             sum(_evaluator.time_usage["track_io_model_load"][2:]) + \
             sum(_evaluator.time_usage["track_io_model_release_each_50"]) + \
             sum(_evaluator.time_usage["track_io_model_init"][2:]) + \
             sum(_evaluator.time_usage["track_io_res_load"][2:]) + \
             sum(_evaluator.time_usage["track_io_data_retrievel"][2:]) + \
             sum(_evaluator.time_usage["track_io_data_preprocess"][2:])

         _evaluator.time_usage["compute_latency"] = sum(_evaluator.time_usage["track_compute"][2:])
         _evaluator.time_usage["latency"] = _evaluator.time_usage["io_latency"] + _evaluator.time_usage[
             "compute_latency"]

         _evaluator.time_usage["avg_compute_latency"] = \
             _evaluator.time_usage["compute_latency"] \
             / len(_evaluator.time_usage["track_compute"][2:])

         logger.info(f"Saving time usag to {time_output_file}")
         # compute time
         write_json(time_output_file, _evaluator.time_usage)
     except:
         logger.info(orjson.dumps(
             {"Errored": traceback.format_exc()}).decode('utf-8'))

         return orjson.dumps(
             {"Errored": traceback.format_exc()}).decode('utf-8')

     return orjson.dumps({"Done": 1}).decode('utf-8')


 if __name__ == "__main__":
     params = {}
     params["budget"] = 10
     params["score_time_per_model"] = 0.0211558125
     params["train_time_per_epoch"] = 5.122203075885773
     params["only_phase1"] = 'true'
     params["config_file"] = './internal/ml/model_selection/config.ini'
     print(coordinator(json.dumps(params)))

     params = {}
     params[
         "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
     params["n"] = 10
     params["k"] = 1
     params["config_file"] = './internal/ml/model_selection/config.ini'
     print(filtering_phase_dataLoader(json.dumps(params)))

     # params = {}
     # params[
     #     "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
     # params["config_file"] = './internal/ml/model_selection/config.ini'
     # print(profiling_refinement_phase(json.dumps(params)))
     #
     # params = {}
     # params["budget"] = 10
     # params[
     #     "mini_batch"] = '[{"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"1"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}, {"col1":"123:123","col2":"123:123","col3":"123:123","label":"0"}]'
     # params["config_file"] = './internal/ml/model_selection/config.ini'
     # print(model_selection(json.dumps(params)))