| # ------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # ------------------------------------------------------------- |
| |
| from typing import Dict |
| |
| from systemds.operator import OperationNode |
| from systemds.script_building.dag import DAGNode |
| from systemds.utils.consts import VALID_INPUT_TYPES |
| |
| __all__ = ['l2svm', 'lm', 'kmeans', 'pca'] |
| |
| |
| def l2svm(x: DAGNode, y: DAGNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: |
| """ |
| Perform L2SVM on matrix with labels given. |
| |
| :param x: Input dataset |
| :param y: Input labels in shape of one column |
| :param kwargs: Dictionary of extra arguments |
| :return: `OperationNode` containing the model fit. |
| """ |
| x._check_matrix_op() |
| params_dict = {'X': x, 'Y': y} |
| params_dict.update(kwargs) |
| return OperationNode(x.sds_context, 'l2svm', named_input_nodes=params_dict) |
| |
| |
| def lm(x: DAGNode, y: DAGNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: |
| """ |
| Performs LM on matrix with labels given. |
| |
| :param x: Input dataset |
| :param y: Input labels in shape of one column |
| :param kwargs: Dictionary of extra arguments |
| :return: `OperationNode` containing the model fit. |
| """ |
| |
| x._check_matrix_op() |
| if x._np_array.size == 0: |
| raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." |
| .format(s=x._np_array.shape)) |
| if y._np_array.size == 0: |
| raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." |
| .format(s=y._np_array.shape)) |
| |
| params_dict = {'X': x, 'y': y} |
| params_dict.update(kwargs) |
| return OperationNode(x.sds_context, 'lm', named_input_nodes=params_dict) |
| |
| |
| def kmeans(x: DAGNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: |
| """ |
| Performs KMeans on matrix input. |
| |
| :param x: Input dataset to perform K-Means on. |
| :param k: The number of centroids to use for the algorithm. |
| :param runs: The number of concurrent instances of K-Means to run (with different initial centroids). |
| :param max_iter: The maximum number of iterations to run the K-Means algorithm for. |
| :param eps: Tolerance for the algorithm to declare convergence using WCSS change ratio. |
| :param is_verbose: Boolean flag if the algorithm should be run in a verbose manner. |
| :param avg_sample_size_per_centroid: The average number of records per centroid in the data samples. |
| """ |
| |
| x._check_matrix_op() |
| if x._np_array.size == 0: |
| raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." |
| .format(s=x._np_array.shape)) |
| |
| if 'k' in kwargs.keys() and kwargs.get('k') < 1: |
| raise ValueError("Invalid number of clusters in K means, number must be integer above 0") |
| |
| params_dict = {'X': x} |
| params_dict.update(kwargs) |
| return OperationNode(x.sds_context, 'kmeans', named_input_nodes=params_dict) |
| |
| |
| def pca(x: DAGNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: |
| """ |
| Performs PCA on the matrix input |
| |
| :param x: Input dataset to perform K-Means on. |
| :param K: The number of reduced dimensions. |
| :param center: Boolean specifying if the input values should be centered. |
| :param scale: Boolean specifying if the input values should be scaled. |
| """ |
| |
| x._check_matrix_op() |
| if x._np_array.size == 0: |
| raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." |
| .format(s=x._np_array.shape)) |
| |
| if 'K' in kwargs.keys() and kwargs.get('K') < 1: |
| raise ValueError("Invalid number of clusters in K means, number must be integer above 0") |
| |
| if 'scale'in kwargs.keys(): |
| if kwargs.get('scale') == True: |
| kwargs.set('scale', "TRUE") |
| elif kwargs.get('scale' == False): |
| kwargs.set('scale', "FALSE") |
| |
| if 'center' in kwargs.keys(): |
| if kwargs.get('center') == True: |
| kwargs.set('center', "TRUE") |
| elif kwargs.get('center' == False): |
| kwargs.set('center', "FALSE") |
| |
| params_dict = {'X': x} |
| params_dict.update(kwargs) |
| return OperationNode(x.sds_context, 'pca', named_input_nodes=params_dict) |
| |