blob: 9a05193d3c7eff7c3bd1ee90626de5ec7bde111b [file] [log] [blame]
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
# Autogenerated By : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/tomeklink.dml
from typing import Dict, Iterable
from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
def tomeklink(X: Matrix,
y: Matrix):
"""
The tomekLink-function performs under sampling by removing Tomek's links for imbalanced multi-class problems
Computes TOMEK links and drops them from data matrix and label vector.
Drops only the majority label and corresponding point of TOMEK links.
:param X: Data Matrix (nxm)
:param y: Label Matrix (nx1), greater than zero
:return: Data Matrix without Tomek links
:return: Labels corresponding to under sampled data
:return: Indices of dropped rows/labels wrt input
"""
params_dict = {'X': X, 'y': y}
vX_0 = Matrix(X.sds_context, '')
vX_1 = Matrix(X.sds_context, '')
vX_2 = Matrix(X.sds_context, '')
output_nodes = [vX_0, vX_1, vX_2, ]
op = MultiReturn(X.sds_context, 'tomeklink', output_nodes, named_input_nodes=params_dict)
vX_0._unnamed_input_nodes = [op]
vX_1._unnamed_input_nodes = [op]
vX_2._unnamed_input_nodes = [op]
return op