| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| # |
| # THIS SCRIPT SOLVES CUBIC SPLINE INTERPOLATION USING THE DIRECT SOLVER |
| # |
| # INPUT PARAMETERS: |
| # -------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # -------------------------------------------------------------------------------------------- |
| # X String --- Location (on HDFS) to read the 1-column matrix of x values knots |
| # Y String --- Location (on HDFS) to read the 1-column matrix of corresponding y values knots |
| # K String --- Location to store the $k_{i}$ -file for the calculated k vectors. the default is to print it to the standard output |
| # O String --- Location to store the output predicted y the default is to print it to the standard output |
| # inp_x Double --- the given input x, for which the cspline will find predicted y. |
| # |
| # fmt String "text" Matrix file output format, such as `text`,`mm`, or `csv` |
| # -------------------------------------------------------------------------------------------- |
| # OUTPUT: Matrix of k parameters (the betas) |
| # |
| # HOW TO INVOKE THIS SCRIPT - EXAMPLE: |
| # hadoop jar SystemDS.jar -f CsplineDS.dml -nvargs X=INPUT_DIR/X Y= INPUT_DIR/Y O=OUTPUT_DIR/Out |
| # -inp_x=4.5 fmt=csv |
| |
| |
| |
| |
| #Assumptions: |
| # - The inputs xs are monotonically increasing, |
| # - there is no duplicates points in x |
| |
| #Algorithms: It implement the https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline |
| #it usages natural spline with q1''(x0) == qn''(xn) == 0.0 |
| |
| |
| |
| ##BEGIN Main Func |
| fileX = $X; |
| fileY = $Y; |
| fileK = $K; |
| fileO = ifdef($O, " "); |
| inp_x = $inp_x |
| fmtO = ifdef ($fmt, "text"); |
| |
| print ("BEGIN CUBIC SPLINE SCRIPT"); |
| |
| print ("Reading X and Y ..."); |
| xs = read (fileX); |
| ys = read (fileY); |
| |
| print("Calculating Ks ...") |
| ks = xs |
| ks = calcKnotsDerivKs(xs, ys) |
| print("Writing Ks ...") |
| write (ks, fileK, format=fmtO); |
| |
| print("Interpolating ...") |
| x = inp_x |
| y = interpSpline(x, xs, ys, ks) |
| |
| print("For inp_x = " + $inp_x + " Calculated y = " + y) |
| |
| y_mat = matrix(y, 1, 1) |
| |
| if (fileO != " ") { |
| write (y_mat, fileO); |
| } else { |
| print(y) |
| } |
| |
| |
| print ("END CUBIC SPLINE REGRESSION SCRIPT"); |
| |
| ##END Main Func |
| |
| |
| |
| #given X<nx1> and corresponding Y<nx1> values for the function. where each (xi,yi) represents a knot. |
| #it calculates the first derivates i.e k of the interpolated polynomial |
| calcKnotsDerivKs = function ( |
| matrix[double] X, matrix[double] Y |
| ) return (matrix[double] K) { |
| nx = nrow(X) |
| ny = nrow(Y) |
| if (nx != ny) { |
| stop("X and Y vectors are of different size") |
| } |
| |
| Xu = trunc(X, 1, "up") # Xu is (X where X[0] is removed) |
| Xd = trunc(X, 1, "down") # Xd is (X where X[n] is removed) |
| |
| Bx=1/(Xu-Xd) # The expr => 1/Delta(X(i) = 1/(X(i)-X(i-1)) |
| |
| |
| Bxd = resize(Bx, 1, 0, "tr") # Bxd is (0, Bx) vector |
| Bxu = resize(Bx, 1, 0, "br") # Bxu is (Bx, 0) vector |
| Dx = 2*(Bxd + Bxu) # the major diagonal entry 2(1/Delta(X(i) + 1/Delta(X(i+1))) |
| |
| MDx = diag(Dx) # convert vector to diagonal matrix |
| |
| MBx = diag(Bx) # this is the temp diagonal matrix, which will form the bands of the tri-diagonal matrix |
| MUx = resize(MBx, 1, 1, "bl") # the upper diagonal matrix of the band |
| MLx = resize(MBx, 1, 1, "tr") # the lower diagonal matrix of the band |
| |
| A=MUx+MDx+MLx # create the complete tri-diagonal matrix |
| |
| #calculate b matrix |
| Yu = trunc(Y, 1, "up") # Yu is (Y where Y[0] is removed) |
| Yd = trunc(Y, 1, "down") # Yd is (Y where Y[n] is removed) |
| By=(Yu-Yd)/(Bx*Bx) # the expr => Delta(Y(i))/Delta(X(i))*Delta(X(i)) |
| |
| By1=resize(By, 1, 0, "tr") # By1 is (0, By) vector |
| By2=resize(By, 1, 0, "br") # By2 is (By, 0) vector |
| b=3*(By1+By2) # the b entries 3*(Delta(Y(i))/Delta(X(i))*Delta(X(i)) + Delta(Y(i+1))/Delta(X(i+1))*Delta(X(i+1))) |
| |
| |
| K = solve(A, b) /* solve Ax = b for x vector and assign it to K*/ |
| |
| } |
| |
| #given the X<nx1> and Y<nx1> n sample points and K (the first derivative of the interp polynomial), it calculate the |
| # y for the given x using the cubic spline interpolation |
| interpSpline = function( |
| double x, matrix[double] X, matrix[double] Y, matrix[double] K |
| ) return (double q) { |
| |
| #first find the right knots for interpolation |
| i = as.integer(nrow(X) - sum(X >= x) + 1) |
| |
| #calc the y as per the algo docs |
| t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1]) |
| |
| a = K[i-1,1]*(X[i,1]-X[i-1,1]) - (Y[i,1]-Y[i-1,1]) |
| b = -K[i,1]*(X[i,1]-X[i-1,1]) + (Y[i,1]-Y[i-1,1]) |
| |
| qm = (1-t)*Y[i-1,1] + t*Y[i,1] + t*(1-t)*(a*(1-t)+b*t) |
| |
| q = as.scalar(qm) |
| |
| } |
| |
| # |
| # trunc the matrix by the specified amount in the specified direction. |
| # The shifted cells are discarded, the matrix is smaller in size |
| # |
| trunc = function ( |
| matrix[double] X, # nxm matrix |
| int by, # shift amount |
| String dir # currently only 'up' is supported. but should handle 'up', 'down', 'left', 'right' |
| ) return (matrix[double] Y) # Y is |
| { |
| r = nrow(X); c = ncol(X); |
| if (by > r) { |
| stop("can't pop matrix more than number of rows") |
| } |
| Y = matrix(0.0, r-by, c) |
| |
| if (r != by ) { |
| if (dir == "up") { |
| Y[1:r-by,] = X[1+by:r,] |
| } else if (dir == "down") { |
| Y[1:r-by,] = X[1:r-by,] |
| } else { |
| stop("trunc unsupported direction " + dir) |
| } |
| } |
| } |
| |
| # resize (only grow and not truncate) the matrix by the specified amount in the specified direction |
| resize = function( |
| matrix[double] X, #nxm matrix |
| int rby, # row resize count |
| int cby, # col resize count |
| String dir |
| ) return (matrix[double] Y) # Y is |
| { |
| r = nrow(X); c = ncol(X); |
| rn = r + rby; cn = c + cby; |
| Y = matrix(0.0, rn, cn) |
| if (dir == "tr") { # top right |
| Y[1+rby:rn, 1:c] = X |
| } else if (dir == "bl") { # bottom left |
| Y[1:r, 1+cby:cn] = X |
| } else if (dir == "tl") { # top left |
| Y[1+rby:rn, 1+cby:cn ] = X |
| } else if (dir == "br") { # bottom right |
| Y[1:r, 1:c] = X |
| } else { |
| stop("Unknown direction dir => " + dir) |
| } |
| } |