blob: d81c2158a92c868831ed4dc927f80e82736ee0d7 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
#
# THIS SCRIPT SOLVES CUBIC SPLINE INTERPOLATION USING THE DIRECT SOLVER
#
# INPUT PARAMETERS:
# --------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# --------------------------------------------------------------------------------------------
# X String --- Location (on HDFS) to read the 1-column matrix of x values knots
# Y String --- Location (on HDFS) to read the 1-column matrix of corresponding y values knots
# K String --- Location to store the $k_{i}$ -file for the calculated k vectors. the default is to print it to the standard output
# O String --- Location to store the output predicted y the default is to print it to the standard output
# inp_x Double --- the given input x, for which the cspline will find predicted y.
#
# fmt String "text" Matrix file output format, such as `text`,`mm`, or `csv`
# --------------------------------------------------------------------------------------------
# OUTPUT: Matrix of k parameters (the betas)
#
# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
# hadoop jar SystemML.jar -f CsplineDS.dml -nvargs X=INPUT_DIR/X Y= INPUT_DIR/Y O=OUTPUT_DIR/Out
# -inp_x=4.5 fmt=csv
#Assumptions:
# - The inputs xs are monotonically increasing,
# - there is no duplicates points in x
#Algorithms: It implement the https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline
#it usages natural spline with q1''(x0) == qn''(xn) == 0.0
##BEGIN Main Func
fileX = $X;
fileY = $Y;
fileK = $K;
fileO = ifdef($O, " ");
inp_x = $inp_x
fmtO = ifdef ($fmt, "text");
print ("BEGIN CUBIC SPLINE SCRIPT");
print ("Reading X and Y ...");
xs = read (fileX);
ys = read (fileY);
print("Calculating Ks ...")
ks = xs
ks = calcKnotsDerivKs(xs, ys)
print("Writing Ks ...")
write (ks, fileK, format=fmtO);
print("Interpolating ...")
x = inp_x
y = interpSpline(x, xs, ys, ks)
print("For inp_x = " + $inp_x + " Calculated y = " + y)
y_mat = matrix(y, 1, 1)
if (fileO != " ") {
write (y_mat, fileO);
} else {
print(y)
}
print ("END CUBIC SPLINE REGRESSION SCRIPT");
##END Main Func
#given X<nx1> and corresponding Y<nx1> values for the function. where each (xi,yi) represents a knot.
#it calculates the first derivates i.e k of the interpolated polynomial
calcKnotsDerivKs = function (
matrix[double] X, matrix[double] Y
) return (matrix[double] K) {
nx = nrow(X)
ny = nrow(Y)
if (nx != ny) {
stop("X and Y vectors are of different size")
}
Xu = trunc(X, 1, "up") # Xu is (X where X[0] is removed)
Xd = trunc(X, 1, "down") # Xd is (X where X[n] is removed)
Bx=1/(Xu-Xd) # The expr => 1/Delta(X(i) = 1/(X(i)-X(i-1))
Bxd = resize(Bx, 1, 0, "tr") # Bxd is (0, Bx) vector
Bxu = resize(Bx, 1, 0, "br") # Bxu is (Bx, 0) vector
Dx = 2*(Bxd + Bxu) # the major diagonal entry 2(1/Delta(X(i) + 1/Delta(X(i+1)))
MDx = diag(Dx) # convert vector to diagonal matrix
MBx = diag(Bx) # this is the temp diagonal matrix, which will form the bands of the tri-diagonal matrix
MUx = resize(MBx, 1, 1, "bl") # the upper diagonal matrix of the band
MLx = resize(MBx, 1, 1, "tr") # the lower diagonal matrix of the band
A=MUx+MDx+MLx # create the complete tri-diagonal matrix
#calculate b matrix
Yu = trunc(Y, 1, "up") # Yu is (Y where Y[0] is removed)
Yd = trunc(Y, 1, "down") # Yd is (Y where Y[n] is removed)
By=(Yu-Yd)/(Bx*Bx) # the expr => Delta(Y(i))/Delta(X(i))*Delta(X(i))
By1=resize(By, 1, 0, "tr") # By1 is (0, By) vector
By2=resize(By, 1, 0, "br") # By2 is (By, 0) vector
b=3*(By1+By2) # the b entries 3*(Delta(Y(i))/Delta(X(i))*Delta(X(i)) + Delta(Y(i+1))/Delta(X(i+1))*Delta(X(i+1)))
K = solve(A, b) /* solve Ax = b for x vector and assign it to K*/
}
#given the X<nx1> and Y<nx1> n sample points and K (the first derivative of the interp polynomial), it calculate the
# y for the given x using the cubic spline interpolation
interpSpline = function(
double x, matrix[double] X, matrix[double] Y, matrix[double] K
) return (double q) {
#first find the right knots for interpolation
i = as.integer(nrow(X) - sum(X >= x) + 1)
#calc the y as per the algo docs
t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1])
a = K[i-1,1]*(X[i,1]-X[i-1,1]) - (Y[i,1]-Y[i-1,1])
b = -K[i,1]*(X[i,1]-X[i-1,1]) + (Y[i,1]-Y[i-1,1])
qm = (1-t)*Y[i-1,1] + t*Y[i,1] + t*(1-t)*(a*(1-t)+b*t)
q = as.scalar(qm)
}
#
# trunc the matrix by the specified amount in the specified direction.
# The shifted cells are discarded, the matrix is smaller in size
#
trunc = function (
matrix[double] X, # nxm matrix
int by, # shift amount
String dir # currently only 'up' is supported. but should handle 'up', 'down', 'left', 'right'
) return (matrix[double] Y) # Y is
{
r = nrow(X); c = ncol(X);
if (by > r) {
stop("can't pop matrix more than number of rows")
}
Y = matrix(0.0, r-by, c)
if (r != by ) {
if (dir == "up") {
Y[1:r-by,] = X[1+by:r,]
} else if (dir == "down") {
Y[1:r-by,] = X[1:r-by,]
} else {
stop("trunc unsupported direction " + dir)
}
}
}
# resize (only grow and not truncate) the matrix by the specified amount in the specified direction
resize = function(
matrix[double] X, #nxm matrix
int rby, # row resize count
int cby, # col resize count
String dir
) return (matrix[double] Y) # Y is
{
r = nrow(X); c = ncol(X);
rn = r + rby; cn = c + cby;
Y = matrix(0.0, rn, cn)
if (dir == "tr") { # top right
Y[1+rby:rn, 1:c] = X
} else if (dir == "bl") { # bottom left
Y[1:r, 1+cby:cn] = X
} else if (dir == "tl") { # top left
Y[1+rby:rn, 1+cby:cn ] = X
} else if (dir == "br") { # bottom right
Y[1:r, 1:c] = X
} else {
stop("Unknown direction dir => " + dir)
}
}