blob: 45527c8eceabcd7a6eff447900248e91505a091c [file] [log] [blame]
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
import os
from typing import Dict, Any
from pyflink.table import StreamTableEnvironment
from pyflink.ml.core.api import Stage
from pyflink.ml.core.linalg import Vectors
from pyflink.ml.core.param import ParamValidators, Param, BooleanParam, IntParam, \
FloatParam, StringParam, VectorParam, IntArrayParam, FloatArrayParam, StringArrayParam
from pyflink.ml.tests.test_utils import PyFlinkMLTestCase
BOOLEAN_PARAM = BooleanParam("boolean_param", "Description", False)
INT_PARAM = IntParam("int_param", "Description", 1, ParamValidators.lt(100))
FLOAT_PARAM = FloatParam("float_param", "Description", 3.0, ParamValidators.lt(100))
STRING_PARAM = StringParam('string_param', "Description", "5")
INT_ARRAY_PARAM = IntArrayParam("int_array_param", "Description", (6, 7))
FLOAT_ARRAY_PARAM = FloatArrayParam("float_array_param", "Description", (10.0, 11.0))
STRING_ARRAY_PARAM = StringArrayParam("string_array_param", "Description", ("14", "15"))
VECTOR_PARAM = VectorParam('vector_param', "Description", Vectors.dense(1, 2, 3))
EXTRA_INT_PARAM = IntParam("extra_int_param",
"Description",
20,
ParamValidators.always_true())
PARAM_WITH_NONE_DEFAULT = IntParam("param_with_none_default",
"Must be explicitly set with a non-none value",
None,
ParamValidators.not_null())
class StageTest(PyFlinkMLTestCase):
def test_param_set_value_with_name(self):
stage = MyStage()
stage.set(INT_PARAM, 2)
self.assertEqual(2, stage.get(INT_PARAM))
dense_vec = Vectors.dense(2, 2)
stage.set(VECTOR_PARAM, dense_vec)
self.assertEqual(dense_vec.get(0), stage.get(VECTOR_PARAM).get(0))
self.assertEqual(dense_vec.get(1), stage.get(VECTOR_PARAM).get(1))
sparse_vec = Vectors.sparse(3, [0, 2], [2, 2])
stage.set(VECTOR_PARAM, sparse_vec)
self.assertEqual(sparse_vec.get(0), stage.get(VECTOR_PARAM).get(0))
self.assertEqual(sparse_vec.get(1), stage.get(VECTOR_PARAM).get(1))
self.assertEqual(sparse_vec.get(2), stage.get(VECTOR_PARAM).get(2))
param = stage.get_param("int_param")
stage.set(param, 3)
self.assertEqual(3, stage.get(param))
param = stage.get_param('extra_int_param')
stage.set(param, 50)
self.assertEqual(50, stage.get(param))
def test_param_with_null_default(self):
stage = MyStage()
import pytest
with pytest.raises(ValueError, match='value should not be None'):
stage.get(PARAM_WITH_NONE_DEFAULT)
stage.set(PARAM_WITH_NONE_DEFAULT, 3)
self.assertEqual(3, stage.get(PARAM_WITH_NONE_DEFAULT))
def test_param_set_invalid_value(self):
stage = MyStage()
import pytest
with pytest.raises(ValueError, match='Parameter int_param is given an invalid value 100.'):
stage.set(INT_PARAM, 100)
with pytest.raises(ValueError,
match='Parameter float_param is given an invalid value 100.0.'):
stage.set(FLOAT_PARAM, 100.0)
with pytest.raises(TypeError,
match="Parameter int_param's type <class 'int'> is incompatible with "
"the type of <class 'str'>"):
stage.set(INT_PARAM, "100")
with pytest.raises(TypeError,
match="Parameter string_param's type <class 'str'> is incompatible with"
" the type of <class 'int'>"):
stage.set(STRING_PARAM, 100)
with pytest.raises(TypeError,
match="Parameter vector_param's type <class 'pyflink.ml.core.linalg"
".Vector'> is incompatible with the type of <class 'int'>"):
stage.set(VECTOR_PARAM, 100)
def test_param_set_valid_value(self):
stage = MyStage()
stage.set(BOOLEAN_PARAM, True)
self.assertTrue(stage.get(BOOLEAN_PARAM))
stage.set(INT_PARAM, 50)
self.assertEqual(50, stage.get(INT_PARAM))
stage.set(FLOAT_PARAM, 50.0)
self.assertEqual(50.0, stage.get(FLOAT_PARAM))
stage.set(STRING_PARAM, "50")
self.assertEqual("50", stage.get(STRING_PARAM))
dense_vec = Vectors.dense(2, 2)
stage.set(VECTOR_PARAM, dense_vec)
self.assertEqual(dense_vec.get(0), stage.get(VECTOR_PARAM).get(0))
self.assertEqual(dense_vec.get(1), stage.get(VECTOR_PARAM).get(1))
sparse_vec = Vectors.sparse(3, [0, 2], [2, 2])
stage.set(VECTOR_PARAM, sparse_vec)
self.assertEqual(sparse_vec.get(0), stage.get(VECTOR_PARAM).get(0))
self.assertEqual(sparse_vec.get(1), stage.get(VECTOR_PARAM).get(1))
self.assertEqual(sparse_vec.get(2), stage.get(VECTOR_PARAM).get(2))
stage.set(INT_ARRAY_PARAM, (50, 51))
self.assertEqual((50, 51), stage.get(INT_ARRAY_PARAM))
stage.set(FLOAT_ARRAY_PARAM, (50.0, 51.0))
self.assertEqual((50.0, 51.0), stage.get(FLOAT_ARRAY_PARAM))
stage.set(STRING_ARRAY_PARAM, ("50", "51"))
self.assertEqual(("50", "51"), stage.get(STRING_ARRAY_PARAM))
def test_stage_save_load(self):
stage = MyStage()
stage.set(PARAM_WITH_NONE_DEFAULT, 1)
path = os.path.join(self.temp_dir, "test_stage_save_load")
stage.save(path)
loaded_stage = MyStage.load(self.env, path)
self.assertEqual(stage.get_param_map(), loaded_stage.get_param_map())
self.assertEqual(1, loaded_stage.get(INT_PARAM))
def test_validators(self):
gt = ParamValidators.gt(10)
self.assertFalse(gt.validate(None))
self.assertFalse(gt.validate(5))
self.assertFalse(gt.validate(10))
self.assertTrue(gt.validate(15))
gt_eq = ParamValidators.gt_eq(10)
self.assertFalse(gt_eq.validate(None))
self.assertFalse(gt_eq.validate(5))
self.assertTrue(gt_eq.validate(10))
self.assertTrue(gt_eq.validate(15))
lt = ParamValidators.lt(10)
self.assertFalse(lt.validate(None))
self.assertTrue(lt.validate(5))
self.assertFalse(lt.validate(10))
self.assertFalse(lt.validate(15))
lt_eq = ParamValidators.lt_eq(10)
self.assertFalse(lt_eq.validate(None))
self.assertTrue(lt_eq.validate(5))
self.assertTrue(lt_eq.validate(10))
self.assertFalse(lt_eq.validate(15))
in_range_inclusive = ParamValidators.in_range(5, 15)
self.assertFalse(in_range_inclusive.validate(None))
self.assertFalse(in_range_inclusive.validate(0))
self.assertTrue(in_range_inclusive.validate(5))
self.assertTrue(in_range_inclusive.validate(10))
self.assertTrue(in_range_inclusive.validate(15))
self.assertFalse(in_range_inclusive.validate(20))
in_range_exclusive = ParamValidators.in_range(5, 15, False, False)
self.assertFalse(in_range_exclusive.validate(None))
self.assertFalse(in_range_exclusive.validate(0))
self.assertFalse(in_range_exclusive.validate(5))
self.assertTrue(in_range_exclusive.validate(10))
self.assertFalse(in_range_exclusive.validate(15))
self.assertFalse(in_range_exclusive.validate(20))
in_array = ParamValidators.in_array([1, 2, 3])
self.assertFalse(in_array.validate(None))
self.assertTrue(in_array.validate(1))
self.assertFalse(in_array.validate(0))
not_null = ParamValidators.not_null()
self.assertTrue(not_null.validate(5))
self.assertFalse(not_null.validate(None))
class MyStage(Stage):
def __init__(self):
self._param_map = {} # type: Dict[Param, Any]
self._init_param()
def save(self, path: str) -> None:
from pyflink.ml.util import read_write_utils
read_write_utils.save_metadata(self, path)
@classmethod
def load(cls, t_env: StreamTableEnvironment, path: str):
from pyflink.ml.util import read_write_utils
return read_write_utils.load_stage_param(path)
def get_param_map(self) -> Dict['Param[Any]', Any]:
return self._param_map
def _init_param(self):
self._param_map[BOOLEAN_PARAM] = BOOLEAN_PARAM.default_value
self._param_map[INT_PARAM] = INT_PARAM.default_value
self._param_map[FLOAT_PARAM] = FLOAT_PARAM.default_value
self._param_map[STRING_PARAM] = STRING_PARAM.default_value
self._param_map[VECTOR_PARAM] = VECTOR_PARAM.default_value
self._param_map[INT_ARRAY_PARAM] = INT_ARRAY_PARAM.default_value
self._param_map[FLOAT_ARRAY_PARAM] = FLOAT_ARRAY_PARAM.default_value
self._param_map[STRING_ARRAY_PARAM] = STRING_ARRAY_PARAM.default_value
self._param_map[EXTRA_INT_PARAM] = EXTRA_INT_PARAM.default_value
self._param_map[PARAM_WITH_NONE_DEFAULT] = PARAM_WITH_NONE_DEFAULT.default_value