| # |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| import json |
| import os |
| import time |
| import uuid |
| |
| from typing import Any, Dict, Generic, List, Optional, Sequence, Type, TypeVar, cast, TYPE_CHECKING |
| |
| |
| from pyspark import SparkContext, since |
| from pyspark.ml.common import inherit_doc |
| from pyspark.sql import SparkSession |
| from pyspark.util import VersionUtils |
| |
| if TYPE_CHECKING: |
| from py4j.java_gateway import JavaGateway, JavaObject |
| from pyspark.ml._typing import PipelineStage |
| from pyspark.ml.base import Params |
| from pyspark.ml.wrapper import JavaWrapper |
| |
| T = TypeVar("T") |
| RW = TypeVar("RW", bound="BaseReadWrite") |
| W = TypeVar("W", bound="MLWriter") |
| JW = TypeVar("JW", bound="JavaMLWriter") |
| RL = TypeVar("RL", bound="MLReadable") |
| JR = TypeVar("JR", bound="JavaMLReader") |
| |
| |
| def _jvm() -> "JavaGateway": |
| """ |
| Returns the JVM view associated with SparkContext. Must be called |
| after SparkContext is initialized. |
| """ |
| jvm = SparkContext._jvm |
| if jvm: |
| return jvm |
| else: |
| raise AttributeError("Cannot load _jvm from SparkContext. Is SparkContext initialized?") |
| |
| |
| class Identifiable: |
| """ |
| Object with a unique ID. |
| """ |
| |
| def __init__(self) -> None: |
| #: A unique id for the object. |
| self.uid = self._randomUID() |
| |
| def __repr__(self) -> str: |
| return self.uid |
| |
| @classmethod |
| def _randomUID(cls) -> str: |
| """ |
| Generate a unique string id for the object. The default implementation |
| concatenates the class name, "_", and 12 random hex chars. |
| """ |
| return str(cls.__name__ + "_" + uuid.uuid4().hex[-12:]) |
| |
| |
| @inherit_doc |
| class BaseReadWrite: |
| """ |
| Base class for MLWriter and MLReader. Stores information about the SparkContext |
| and SparkSession. |
| |
| .. versionadded:: 2.3.0 |
| """ |
| |
| def __init__(self) -> None: |
| self._sparkSession: Optional[SparkSession] = None |
| |
| def session(self: RW, sparkSession: SparkSession) -> RW: |
| """ |
| Sets the Spark Session to use for saving/loading. |
| """ |
| self._sparkSession = sparkSession |
| return self |
| |
| @property |
| def sparkSession(self) -> SparkSession: |
| """ |
| Returns the user-specified Spark Session or the default. |
| """ |
| if self._sparkSession is None: |
| self._sparkSession = SparkSession._getActiveSessionOrCreate() |
| assert self._sparkSession is not None |
| return self._sparkSession |
| |
| @property |
| def sc(self) -> SparkContext: |
| """ |
| Returns the underlying `SparkContext`. |
| """ |
| assert self.sparkSession is not None |
| return self.sparkSession.sparkContext |
| |
| |
| @inherit_doc |
| class MLWriter(BaseReadWrite): |
| """ |
| Utility class that can save ML instances. |
| |
| .. versionadded:: 2.0.0 |
| """ |
| |
| def __init__(self) -> None: |
| super(MLWriter, self).__init__() |
| self.shouldOverwrite: bool = False |
| self.optionMap: Dict[str, Any] = {} |
| |
| def _handleOverwrite(self, path: str) -> None: |
| from pyspark.ml.wrapper import JavaWrapper |
| |
| _java_obj = JavaWrapper._new_java_obj("org.apache.spark.ml.util.FileSystemOverwrite") |
| wrapper = JavaWrapper(_java_obj) |
| wrapper._call_java("handleOverwrite", path, True, self.sparkSession._jsparkSession) |
| |
| def save(self, path: str) -> None: |
| """Save the ML instance to the input path.""" |
| if self.shouldOverwrite: |
| self._handleOverwrite(path) |
| self.saveImpl(path) |
| |
| def saveImpl(self, path: str) -> None: |
| """ |
| save() handles overwriting and then calls this method. Subclasses should override this |
| method to implement the actual saving of the instance. |
| """ |
| raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self)) |
| |
| def overwrite(self) -> "MLWriter": |
| """Overwrites if the output path already exists.""" |
| self.shouldOverwrite = True |
| return self |
| |
| def option(self, key: str, value: Any) -> "MLWriter": |
| """ |
| Adds an option to the underlying MLWriter. See the documentation for the specific model's |
| writer for possible options. The option name (key) is case-insensitive. |
| """ |
| self.optionMap[key.lower()] = str(value) |
| return self |
| |
| |
| @inherit_doc |
| class GeneralMLWriter(MLWriter): |
| """ |
| Utility class that can save ML instances in different formats. |
| |
| .. versionadded:: 2.4.0 |
| """ |
| |
| def format(self, source: str) -> "GeneralMLWriter": |
| """ |
| Specifies the format of ML export ("pmml", "internal", or the fully qualified class |
| name for export). |
| """ |
| self.source = source |
| return self |
| |
| |
| @inherit_doc |
| class JavaMLWriter(MLWriter): |
| """ |
| (Private) Specialization of :py:class:`MLWriter` for :py:class:`JavaParams` types |
| """ |
| |
| _jwrite: "JavaObject" |
| |
| def __init__(self, instance: "JavaMLWritable"): |
| super(JavaMLWriter, self).__init__() |
| _java_obj = instance._to_java() # type: ignore[attr-defined] |
| self._jwrite = _java_obj.write() |
| |
| def save(self, path: str) -> None: |
| """Save the ML instance to the input path.""" |
| if not isinstance(path, str): |
| raise TypeError("path should be a string, got type %s" % type(path)) |
| self._jwrite.save(path) |
| |
| def overwrite(self) -> "JavaMLWriter": |
| """Overwrites if the output path already exists.""" |
| self._jwrite.overwrite() |
| return self |
| |
| def option(self, key: str, value: str) -> "JavaMLWriter": |
| self._jwrite.option(key, value) |
| return self |
| |
| def session(self, sparkSession: SparkSession) -> "JavaMLWriter": |
| """Sets the Spark Session to use for saving.""" |
| self._jwrite.session(sparkSession._jsparkSession) |
| return self |
| |
| |
| @inherit_doc |
| class GeneralJavaMLWriter(JavaMLWriter): |
| """ |
| (Private) Specialization of :py:class:`GeneralMLWriter` for :py:class:`JavaParams` types |
| """ |
| |
| def __init__(self, instance: "JavaMLWritable"): |
| super(GeneralJavaMLWriter, self).__init__(instance) |
| |
| def format(self, source: str) -> "GeneralJavaMLWriter": |
| """ |
| Specifies the format of ML export ("pmml", "internal", or the fully qualified class |
| name for export). |
| """ |
| self._jwrite.format(source) |
| return self |
| |
| |
| @inherit_doc |
| class MLWritable: |
| """ |
| Mixin for ML instances that provide :py:class:`MLWriter`. |
| |
| .. versionadded:: 2.0.0 |
| """ |
| |
| def write(self) -> MLWriter: |
| """Returns an MLWriter instance for this ML instance.""" |
| raise NotImplementedError("MLWritable is not yet implemented for type: %r" % type(self)) |
| |
| def save(self, path: str) -> None: |
| """Save this ML instance to the given path, a shortcut of 'write().save(path)'.""" |
| self.write().save(path) |
| |
| |
| @inherit_doc |
| class JavaMLWritable(MLWritable): |
| """ |
| (Private) Mixin for ML instances that provide :py:class:`JavaMLWriter`. |
| """ |
| |
| def write(self) -> JavaMLWriter: |
| """Returns an MLWriter instance for this ML instance.""" |
| return JavaMLWriter(self) |
| |
| |
| @inherit_doc |
| class GeneralJavaMLWritable(JavaMLWritable): |
| """ |
| (Private) Mixin for ML instances that provide :py:class:`GeneralJavaMLWriter`. |
| """ |
| |
| def write(self) -> GeneralJavaMLWriter: |
| """Returns an GeneralMLWriter instance for this ML instance.""" |
| return GeneralJavaMLWriter(self) |
| |
| |
| @inherit_doc |
| class MLReader(BaseReadWrite, Generic[RL]): |
| """ |
| Utility class that can load ML instances. |
| |
| .. versionadded:: 2.0.0 |
| """ |
| |
| def __init__(self) -> None: |
| super(MLReader, self).__init__() |
| |
| def load(self, path: str) -> RL: |
| """Load the ML instance from the input path.""" |
| raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self)) |
| |
| |
| @inherit_doc |
| class JavaMLReader(MLReader[RL]): |
| """ |
| (Private) Specialization of :py:class:`MLReader` for :py:class:`JavaParams` types |
| """ |
| |
| def __init__(self, clazz: Type["JavaMLReadable[RL]"]) -> None: |
| super(JavaMLReader, self).__init__() |
| self._clazz = clazz |
| self._jread = self._load_java_obj(clazz).read() |
| |
| def load(self, path: str) -> RL: |
| """Load the ML instance from the input path.""" |
| if not isinstance(path, str): |
| raise TypeError("path should be a string, got type %s" % type(path)) |
| java_obj = self._jread.load(path) |
| if not hasattr(self._clazz, "_from_java"): |
| raise NotImplementedError( |
| "This Java ML type cannot be loaded into Python currently: %r" % self._clazz |
| ) |
| return self._clazz._from_java(java_obj) # type: ignore[attr-defined] |
| |
| def session(self: JR, sparkSession: SparkSession) -> JR: |
| """Sets the Spark Session to use for loading.""" |
| self._jread.session(sparkSession._jsparkSession) |
| return self |
| |
| @classmethod |
| def _java_loader_class(cls, clazz: Type["JavaMLReadable[RL]"]) -> str: |
| """ |
| Returns the full class name of the Java ML instance. The default |
| implementation replaces "pyspark" by "org.apache.spark" in |
| the Python full class name. |
| """ |
| java_package = clazz.__module__.replace("pyspark", "org.apache.spark") |
| if clazz.__name__ in ("Pipeline", "PipelineModel"): |
| # Remove the last package name "pipeline" for Pipeline and PipelineModel. |
| java_package = ".".join(java_package.split(".")[0:-1]) |
| return java_package + "." + clazz.__name__ |
| |
| @classmethod |
| def _load_java_obj(cls, clazz: Type["JavaMLReadable[RL]"]) -> "JavaObject": |
| """Load the peer Java object of the ML instance.""" |
| java_class = cls._java_loader_class(clazz) |
| java_obj = _jvm() |
| for name in java_class.split("."): |
| java_obj = getattr(java_obj, name) |
| return java_obj |
| |
| |
| @inherit_doc |
| class MLReadable(Generic[RL]): |
| """ |
| Mixin for instances that provide :py:class:`MLReader`. |
| |
| .. versionadded:: 2.0.0 |
| """ |
| |
| @classmethod |
| def read(cls) -> MLReader[RL]: |
| """Returns an MLReader instance for this class.""" |
| raise NotImplementedError("MLReadable.read() not implemented for type: %r" % cls) |
| |
| @classmethod |
| def load(cls, path: str) -> RL: |
| """Reads an ML instance from the input path, a shortcut of `read().load(path)`.""" |
| return cls.read().load(path) |
| |
| |
| @inherit_doc |
| class JavaMLReadable(MLReadable[RL]): |
| """ |
| (Private) Mixin for instances that provide JavaMLReader. |
| """ |
| |
| @classmethod |
| def read(cls) -> JavaMLReader[RL]: |
| """Returns an MLReader instance for this class.""" |
| return JavaMLReader(cls) |
| |
| |
| @inherit_doc |
| class DefaultParamsWritable(MLWritable): |
| """ |
| Helper trait for making simple :py:class:`Params` types writable. If a :py:class:`Params` |
| class stores all data as :py:class:`Param` values, then extending this trait will provide |
| a default implementation of writing saved instances of the class. |
| This only handles simple :py:class:`Param` types; e.g., it will not handle |
| :py:class:`pyspark.sql.DataFrame`. See :py:class:`DefaultParamsReadable`, the counterpart |
| to this class. |
| |
| .. versionadded:: 2.3.0 |
| """ |
| |
| def write(self) -> MLWriter: |
| """Returns a DefaultParamsWriter instance for this class.""" |
| from pyspark.ml.param import Params |
| |
| if isinstance(self, Params): |
| return DefaultParamsWriter(self) |
| else: |
| raise TypeError( |
| "Cannot use DefaultParamsWritable with type %s because it does not " |
| + " extend Params.", |
| type(self), |
| ) |
| |
| |
| @inherit_doc |
| class DefaultParamsWriter(MLWriter): |
| """ |
| Specialization of :py:class:`MLWriter` for :py:class:`Params` types |
| |
| Class for writing Estimators and Transformers whose parameters are JSON-serializable. |
| |
| .. versionadded:: 2.3.0 |
| """ |
| |
| def __init__(self, instance: "Params"): |
| super(DefaultParamsWriter, self).__init__() |
| self.instance = instance |
| |
| def saveImpl(self, path: str) -> None: |
| DefaultParamsWriter.saveMetadata(self.instance, path, self.sc) |
| |
| @staticmethod |
| def extractJsonParams(instance: "Params", skipParams: Sequence[str]) -> Dict[str, Any]: |
| paramMap = instance.extractParamMap() |
| jsonParams = { |
| param.name: value for param, value in paramMap.items() if param.name not in skipParams |
| } |
| return jsonParams |
| |
| @staticmethod |
| def saveMetadata( |
| instance: "Params", |
| path: str, |
| sc: SparkContext, |
| extraMetadata: Optional[Dict[str, Any]] = None, |
| paramMap: Optional[Dict[str, Any]] = None, |
| ) -> None: |
| """ |
| Saves metadata + Params to: path + "/metadata" |
| |
| - class |
| - timestamp |
| - sparkVersion |
| - uid |
| - paramMap |
| - defaultParamMap (since 2.4.0) |
| - (optionally, extra metadata) |
| |
| Parameters |
| ---------- |
| extraMetadata : dict, optional |
| Extra metadata to be saved at same level as uid, paramMap, etc. |
| paramMap : dict, optional |
| If given, this is saved in the "paramMap" field. |
| """ |
| metadataPath = os.path.join(path, "metadata") |
| metadataJson = DefaultParamsWriter._get_metadata_to_save( |
| instance, sc, extraMetadata, paramMap |
| ) |
| sc.parallelize([metadataJson], 1).saveAsTextFile(metadataPath) |
| |
| @staticmethod |
| def _get_metadata_to_save( |
| instance: "Params", |
| sc: SparkContext, |
| extraMetadata: Optional[Dict[str, Any]] = None, |
| paramMap: Optional[Dict[str, Any]] = None, |
| ) -> str: |
| """ |
| Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save. |
| This is useful for ensemble models which need to save metadata for many sub-models. |
| |
| Notes |
| ----- |
| See :py:meth:`DefaultParamsWriter.saveMetadata` for details on what this includes. |
| """ |
| uid = instance.uid |
| cls = instance.__module__ + "." + instance.__class__.__name__ |
| |
| # User-supplied param values |
| params = instance._paramMap |
| jsonParams = {} |
| if paramMap is not None: |
| jsonParams = paramMap |
| else: |
| for p in params: |
| jsonParams[p.name] = params[p] |
| |
| # Default param values |
| jsonDefaultParams = {} |
| for p in instance._defaultParamMap: |
| jsonDefaultParams[p.name] = instance._defaultParamMap[p] |
| |
| basicMetadata = { |
| "class": cls, |
| "timestamp": int(round(time.time() * 1000)), |
| "sparkVersion": sc.version, |
| "uid": uid, |
| "paramMap": jsonParams, |
| "defaultParamMap": jsonDefaultParams, |
| } |
| if extraMetadata is not None: |
| basicMetadata.update(extraMetadata) |
| return json.dumps(basicMetadata, separators=(",", ":")) |
| |
| |
| @inherit_doc |
| class DefaultParamsReadable(MLReadable[RL]): |
| """ |
| Helper trait for making simple :py:class:`Params` types readable. |
| If a :py:class:`Params` class stores all data as :py:class:`Param` values, |
| then extending this trait will provide a default implementation of reading saved |
| instances of the class. This only handles simple :py:class:`Param` types; |
| e.g., it will not handle :py:class:`pyspark.sql.DataFrame`. See |
| :py:class:`DefaultParamsWritable`, the counterpart to this class. |
| |
| .. versionadded:: 2.3.0 |
| """ |
| |
| @classmethod |
| def read(cls) -> "DefaultParamsReader[RL]": |
| """Returns a DefaultParamsReader instance for this class.""" |
| return DefaultParamsReader(cls) |
| |
| |
| @inherit_doc |
| class DefaultParamsReader(MLReader[RL]): |
| """ |
| Specialization of :py:class:`MLReader` for :py:class:`Params` types |
| |
| Default :py:class:`MLReader` implementation for transformers and estimators that |
| contain basic (json-serializable) params and no data. This will not handle |
| more complex params or types with data (e.g., models with coefficients). |
| |
| .. versionadded:: 2.3.0 |
| """ |
| |
| def __init__(self, cls: Type[DefaultParamsReadable[RL]]): |
| super(DefaultParamsReader, self).__init__() |
| self.cls = cls |
| |
| @staticmethod |
| def __get_class(clazz: str) -> Type[RL]: |
| """ |
| Loads Python class from its name. |
| """ |
| parts = clazz.split(".") |
| module = ".".join(parts[:-1]) |
| m = __import__(module, fromlist=[parts[-1]]) |
| return getattr(m, parts[-1]) |
| |
| def load(self, path: str) -> RL: |
| metadata = DefaultParamsReader.loadMetadata(path, self.sc) |
| py_type: Type[RL] = DefaultParamsReader.__get_class(metadata["class"]) |
| instance = py_type() |
| cast("Params", instance)._resetUid(metadata["uid"]) |
| DefaultParamsReader.getAndSetParams(instance, metadata) |
| return instance |
| |
| @staticmethod |
| def loadMetadata(path: str, sc: SparkContext, expectedClassName: str = "") -> Dict[str, Any]: |
| """ |
| Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata` |
| |
| Parameters |
| ---------- |
| path : str |
| sc : :py:class:`pyspark.SparkContext` |
| expectedClassName : str, optional |
| If non empty, this is checked against the loaded metadata. |
| """ |
| metadataPath = os.path.join(path, "metadata") |
| metadataStr = sc.textFile(metadataPath, 1).first() |
| loadedVals = DefaultParamsReader._parseMetaData(metadataStr, expectedClassName) |
| return loadedVals |
| |
| @staticmethod |
| def _parseMetaData(metadataStr: str, expectedClassName: str = "") -> Dict[str, Any]: |
| """ |
| Parse metadata JSON string produced by :py:meth`DefaultParamsWriter._get_metadata_to_save`. |
| This is a helper function for :py:meth:`DefaultParamsReader.loadMetadata`. |
| |
| Parameters |
| ---------- |
| metadataStr : str |
| JSON string of metadata |
| expectedClassName : str, optional |
| If non empty, this is checked against the loaded metadata. |
| """ |
| metadata = json.loads(metadataStr) |
| className = metadata["class"] |
| if len(expectedClassName) > 0: |
| assert className == expectedClassName, ( |
| "Error loading metadata: Expected " |
| + "class name {} but found class name {}".format(expectedClassName, className) |
| ) |
| return metadata |
| |
| @staticmethod |
| def getAndSetParams( |
| instance: RL, metadata: Dict[str, Any], skipParams: Optional[List[str]] = None |
| ) -> None: |
| """ |
| Extract Params from metadata, and set them in the instance. |
| """ |
| # Set user-supplied param values |
| for paramName in metadata["paramMap"]: |
| param = cast("Params", instance).getParam(paramName) |
| if skipParams is None or paramName not in skipParams: |
| paramValue = metadata["paramMap"][paramName] |
| cast("Params", instance).set(param, paramValue) |
| |
| # Set default param values |
| majorAndMinorVersions = VersionUtils.majorMinorVersion(metadata["sparkVersion"]) |
| major = majorAndMinorVersions[0] |
| minor = majorAndMinorVersions[1] |
| |
| # For metadata file prior to Spark 2.4, there is no default section. |
| if major > 2 or (major == 2 and minor >= 4): |
| assert "defaultParamMap" in metadata, ( |
| "Error loading metadata: Expected " + "`defaultParamMap` section not found" |
| ) |
| |
| for paramName in metadata["defaultParamMap"]: |
| paramValue = metadata["defaultParamMap"][paramName] |
| cast("Params", instance)._setDefault(**{paramName: paramValue}) |
| |
| @staticmethod |
| def isPythonParamsInstance(metadata: Dict[str, Any]) -> bool: |
| return metadata["class"].startswith("pyspark.ml.") |
| |
| @staticmethod |
| def loadParamsInstance(path: str, sc: SparkContext) -> RL: |
| """ |
| Load a :py:class:`Params` instance from the given path, and return it. |
| This assumes the instance inherits from :py:class:`MLReadable`. |
| """ |
| metadata = DefaultParamsReader.loadMetadata(path, sc) |
| if DefaultParamsReader.isPythonParamsInstance(metadata): |
| pythonClassName = metadata["class"] |
| else: |
| pythonClassName = metadata["class"].replace("org.apache.spark", "pyspark") |
| py_type: Type[RL] = DefaultParamsReader.__get_class(pythonClassName) |
| instance = py_type.load(path) |
| return instance |
| |
| |
| @inherit_doc |
| class HasTrainingSummary(Generic[T]): |
| """ |
| Base class for models that provides Training summary. |
| |
| .. versionadded:: 3.0.0 |
| """ |
| |
| @property # type: ignore[misc] |
| @since("2.1.0") |
| def hasSummary(self) -> bool: |
| """ |
| Indicates whether a training summary exists for this model |
| instance. |
| """ |
| return cast("JavaWrapper", self)._call_java("hasSummary") |
| |
| @property # type: ignore[misc] |
| @since("2.1.0") |
| def summary(self) -> T: |
| """ |
| Gets summary of the model trained on the training set. An exception is thrown if |
| no summary exists. |
| """ |
| return cast("JavaWrapper", self)._call_java("summary") |
| |
| |
| class MetaAlgorithmReadWrite: |
| @staticmethod |
| def isMetaEstimator(pyInstance: Any) -> bool: |
| from pyspark.ml import Estimator, Pipeline |
| from pyspark.ml.tuning import _ValidatorParams |
| from pyspark.ml.classification import OneVsRest |
| |
| return ( |
| isinstance(pyInstance, Pipeline) |
| or isinstance(pyInstance, OneVsRest) |
| or (isinstance(pyInstance, Estimator) and isinstance(pyInstance, _ValidatorParams)) |
| ) |
| |
| @staticmethod |
| def getAllNestedStages(pyInstance: Any) -> List["Params"]: |
| from pyspark.ml import Pipeline, PipelineModel |
| from pyspark.ml.tuning import _ValidatorParams |
| from pyspark.ml.classification import OneVsRest, OneVsRestModel |
| |
| # TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel |
| # support pipelineModel property. |
| pySubStages: Sequence["Params"] |
| |
| if isinstance(pyInstance, Pipeline): |
| pySubStages = pyInstance.getStages() |
| elif isinstance(pyInstance, PipelineModel): |
| pySubStages = cast(List["PipelineStage"], pyInstance.stages) |
| elif isinstance(pyInstance, _ValidatorParams): |
| raise ValueError("PySpark does not support nested validator.") |
| elif isinstance(pyInstance, OneVsRest): |
| pySubStages = [pyInstance.getClassifier()] |
| elif isinstance(pyInstance, OneVsRestModel): |
| pySubStages = [pyInstance.getClassifier()] + pyInstance.models # type: ignore[operator] |
| else: |
| pySubStages = [] |
| |
| nestedStages = [] |
| for pySubStage in pySubStages: |
| nestedStages.extend(MetaAlgorithmReadWrite.getAllNestedStages(pySubStage)) |
| |
| return [pyInstance] + nestedStages |
| |
| @staticmethod |
| def getUidMap(instance: Any) -> Dict[str, "Params"]: |
| nestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance) |
| uidMap = {stage.uid: stage for stage in nestedStages} |
| if len(nestedStages) != len(uidMap): |
| raise RuntimeError( |
| f"{instance.__class__.__module__}.{instance.__class__.__name__}" |
| f".load found a compound estimator with stages with duplicate " |
| f"UIDs. List of UIDs: {list(uidMap.keys())}." |
| ) |
| return uidMap |