blob: 58f161a3ce22495b9234df86b89858b192aae683 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# The lm-function solves linear regression using either the direct solve
# method or the conjugate gradient algorithm depending on the input size
# of the matrices (See lmDS-function and lmCG-function respectively).
#
# .. code-block:: python
#
# >>> import numpy as np
# >>> from systemds.context import SystemDSContext
# >>> from systemds.operator.algorithm import lm
# >>>
# >>> np.random.seed(0)
# >>> features = np.random.rand(10, 15)
# >>> y = np.random.rand(10, 1)
# >>>
# >>> with SystemDSContext() as sds:
# ... weights = lm(sds.from_numpy(features), sds.from_numpy(y)).compute()
# ... print(weights)
# [[-0.11538199]
# [-0.20386541]
# [-0.39956034]
# [ 1.04078623]
# [ 0.43270839]
# [ 0.18954599]
# [ 0.49858969]
# [-0.26812763]
# [ 0.09961844]
# [-0.57000751]
# [-0.43386048]
# [ 0.55358873]
# [-0.54638565]
# [ 0.2205885 ]
# [ 0.37957689]]
#
#
# INPUT:
# --------------------------------------------------------------------
# X Matrix of feature vectors.
# y 1-column matrix of response values.
# icpt Intercept presence, shifting and rescaling the columns of X
# reg Regularization constant (lambda) for L2-regularization. set to nonzero
# for highly dependant/sparse/numerous features
# tol Tolerance (epsilon); conjugate gradient procedure terminates early if L2
# norm of the beta-residual is less than tolerance * its initial norm
# maxi Maximum number of conjugate gradient iterations. 0 = no maximum
# verbose If TRUE print messages are activated
# --------------------------------------------------------------------
#
# OUTPUT:
# ---------------------------------------------------------------
# B The model fit beta that can be used as input in lmPredict
# ---------------------------------------------------------------
m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
return (Matrix[Double] B) {
if( ncol(X) <= 1024 )
B = lmDS(X, y, icpt, reg, verbose)
else
B = lmCG(X, y, icpt, reg, tol, maxi, verbose)
}