scripts/builtin/lm.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # The lm-function solves linear regression using either the direct solve
 # method or the conjugate gradient algorithm depending on the input size
 # of the matrices (See lmDS-function and lmCG-function respectively).
 #
 # .. code-block:: python
 #
 #   >>> import numpy as np
 #   >>> from systemds.context import SystemDSContext
 #   >>> from systemds.operator.algorithm import lm
 #   >>>
 #   >>> np.random.seed(0)
 #   >>> features = np.random.rand(10, 15)
 #   >>> y = np.random.rand(10, 1)
 #   >>>
 #   >>> with SystemDSContext() as sds:
 #   ...     weights = lm(sds.from_numpy(features), sds.from_numpy(y)).compute()
 #   ...     print(weights)
 #   [[-0.11538199]
 #    [-0.20386541]
 #    [-0.39956034]
 #    [ 1.04078623]
 #    [ 0.43270839]
 #    [ 0.18954599]
 #    [ 0.49858969]
 #    [-0.26812763]
 #    [ 0.09961844]
 #    [-0.57000751]
 #    [-0.43386048]
 #    [ 0.55358873]
 #    [-0.54638565]
 #    [ 0.2205885 ]
 #    [ 0.37957689]]
 #
 #
 # INPUT:
 # --------------------------------------------------------------------
 # X        Matrix of feature vectors.
 # y        1-column matrix of response values.
 # icpt     Intercept presence, shifting and rescaling the columns of X
 # reg      Regularization constant (lambda) for L2-regularization. set to nonzero
 #          for highly dependant/sparse/numerous features
 # tol      Tolerance (epsilon); conjugate gradient procedure terminates early if L2
 #          norm of the beta-residual is less than tolerance * its initial norm
 # maxi     Maximum number of conjugate gradient iterations. 0 = no maximum
 # verbose  If TRUE print messages are activated
 # --------------------------------------------------------------------
 #
 # OUTPUT:
 # ---------------------------------------------------------------
 # B     The model fit beta that can be used as input in lmPredict
 # ---------------------------------------------------------------

 m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
     Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
     return (Matrix[Double] B) {
   if( ncol(X) <= 1024 )
     B = lmDS(X, y, icpt, reg, verbose)
   else
     B = lmCG(X, y, icpt, reg, tol, maxi, verbose)
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# The lm-function solves linear regression using either the direct solve
	# method or the conjugate gradient algorithm depending on the input size
	# of the matrices (See lmDS-function and lmCG-function respectively).
	#
	# .. code-block:: python
	#
	# >>> import numpy as np
	# >>> from systemds.context import SystemDSContext
	# >>> from systemds.operator.algorithm import lm
	# >>>
	# >>> np.random.seed(0)
	# >>> features = np.random.rand(10, 15)
	# >>> y = np.random.rand(10, 1)
	# >>>
	# >>> with SystemDSContext() as sds:
	# ... weights = lm(sds.from_numpy(features), sds.from_numpy(y)).compute()
	# ... print(weights)
	# [[-0.11538199]
	# [-0.20386541]
	# [-0.39956034]
	# [ 1.04078623]
	# [ 0.43270839]
	# [ 0.18954599]
	# [ 0.49858969]
	# [-0.26812763]
	# [ 0.09961844]
	# [-0.57000751]
	# [-0.43386048]
	# [ 0.55358873]
	# [-0.54638565]
	# [ 0.2205885 ]
	# [ 0.37957689]]
	#
	#
	# INPUT:
	# --------------------------------------------------------------------
	# X Matrix of feature vectors.
	# y 1-column matrix of response values.
	# icpt Intercept presence, shifting and rescaling the columns of X
	# reg Regularization constant (lambda) for L2-regularization. set to nonzero
	# for highly dependant/sparse/numerous features
	# tol Tolerance (epsilon); conjugate gradient procedure terminates early if L2
	# norm of the beta-residual is less than tolerance * its initial norm
	# maxi Maximum number of conjugate gradient iterations. 0 = no maximum
	# verbose If TRUE print messages are activated
	# --------------------------------------------------------------------
	#
	# OUTPUT:
	# ---------------------------------------------------------------
	# B The model fit beta that can be used as input in lmPredict
	# ---------------------------------------------------------------

	m_lm = function(Matrix[Double] X, Matrix[Double] y, Integer icpt = 0,
	Double reg = 1e-7, Double tol = 1e-7, Integer maxi = 0, Boolean verbose = TRUE)
	return (Matrix[Double] B) {
	if( ncol(X) <= 1024 )
	B = lmDS(X, y, icpt, reg, verbose)
	else
	B = lmCG(X, y, icpt, reg, tol, maxi, verbose)
	}