| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| # |
| # THIS SCRIPT SOLVES CUBIC SPLINE INTERPOLATION USING THE CONJUGATE GRADIENT ALGORITHM |
| # |
| # INPUT PARAMETERS: |
| # -------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # -------------------------------------------------------------------------------------------- |
| # X String --- Location (on HDFS) to read the 1-column matrix of x values knots |
| # Y String --- Location (on HDFS) to read the 1-column matrix of corresponding y values knots |
| # K String --- Location to store the $k_{i}$ -file for the calculated k vectors. the default is to print it to the standard output |
| # O String --- Location to store the output predicted y the default is to print it to the standard output |
| # inp_x Double --- the given input x, for which the cspline will find predicted y. |
| # Log String " " Location to store iteration-specific variables for monitoring and debugging purposes |
| # |
| # tol Double 0.000001 Tolerance (epsilon); conjugate graduent procedure terminates early if |
| # L2 norm of the beta-residual is less than tolerance * its initial norm |
| # maxi Int 0 Maximum number of conjugate gradient iterations, 0 = no maximum |
| # fmt String "text" Matrix file output format, such as `text`,`mm`, or `csv` |
| # -------------------------------------------------------------------------------------------- |
| # OUTPUT: Matrix of k parameters |
| # |
| # The Log file, when requested, contains the following per-iteration variables in CSV |
| # format, each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for |
| # initial values: |
| # |
| # NAME MEANING |
| # ------------------------------------------------------------------------------------- |
| # CG_RESIDUAL_NORM L2-norm of Conj.Grad.residual, which is A %*% beta - t(X) %*% y |
| # where A = t(X) %*% X + diag (lambda), or a similar quantity |
| # CG_RESIDUAL_RATIO Ratio of current L2-norm of Conj.Grad.residual over the initial |
| # ------------------------------------------------------------------------------------- |
| # |
| # HOW TO INVOKE THIS SCRIPT - EXAMPLE: |
| # hadoop jar SystemDS.jar -f CsplineCG.dml -nvargs X=INPUT_DIR/X Y=INPUT_DIR/Y K=OUTPUT_DIR/K O=OUTPUT_DIR/Out |
| # tol=0.001 maxi=100 fmt=csv Log=OUTPUT_DIR/log |
| |
| |
| |
| |
| #Assumptions: |
| # - The inputs xs are monotonically increasing, |
| # - there is no duplicates points in x |
| |
| #Algorithms: It implement the https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline |
| #it usages natural spline with q1''(x0) == qn''(xn) == 0.0 |
| |
| |
| |
| ##BEGIN Main Func |
| fileX = $X; |
| fileY = $Y; |
| fileK = $K; |
| fileO = ifdef($O, " "); |
| inp_x = $inp_x |
| fileLog = ifdef ($Log, " "); |
| fmtO = ifdef ($fmt, "csv"); |
| |
| tolerance = ifdef ($tol, 0.000001); # $tol=0.000001; |
| max_iteration = ifdef ($maxi, 0); # $maxi=0; |
| |
| print ("BEGIN CUBIC SPLINE SCRIPT"); |
| |
| print ("Reading X and Y ..."); |
| xs = read (fileX); |
| ys = read (fileY); |
| |
| print("Calculating Ks ...") |
| ks = xs |
| ks = calcKnotsDerivKs(xs, ys, |
| max_iteration, tolerance, fileLog) |
| print("Writing Ks ...") |
| write (ks, fileO, format=fmtO); |
| |
| print("Interpolating ...") |
| x = inp_x |
| y = interpSpline(x, xs, ys, ks) |
| |
| print("For inp_x = " + $inp_x + " Calculated y = " + y) |
| |
| y_mat = matrix(y, 1, 1) |
| |
| if (fileO != " ") { |
| write (y_mat, fileO); |
| } else { |
| print(y) |
| } |
| |
| print ("END CUBIC SPLINE REGRESSION SCRIPT"); |
| |
| ##END Main Func |
| |
| |
| |
| #given X<nx1> and corresponding Y<nx1> values for the function. where each (xi,yi) represents a knot. |
| #it calculates the first derivates i.e k of the interpolated polynomial |
| calcKnotsDerivKs = function ( |
| matrix[double] X, matrix[double] Y, |
| int max_iteration, |
| double tolerance, |
| String fileLog |
| ) return (matrix[double] K) { |
| |
| nx = nrow(X) |
| ny = nrow(Y) |
| if (nx != ny) { |
| stop("X and Y vectors are of different size") |
| } |
| |
| Xu = trunc(X, 1, "up") # Xu is (X where X[0] is removed) |
| Xd = trunc(X, 1, "down") # Xd is (X where X[n] is removed) |
| |
| Bx=1/(Xu-Xd) # The expr => 1/Delta(X(i) = 1/(X(i)-X(i-1)) |
| |
| |
| Bxd = resize(Bx, 1, 0, "tr") # Bxd is (0, Bx) vector |
| Bxu = resize(Bx, 1, 0, "br") # Bxu is (Bx, 0) vector |
| Dx = 2*(Bxd + Bxu) # the major diagonal entry 2(1/Delta(X(i) + 1/Delta(X(i+1))) |
| |
| MDx = diag(Dx) # convert vector to diagonal matrix |
| |
| MBx = diag(Bx) # this is the temp diagonal matrix, which will form the bands of the tri-diagonal matrix |
| MUx = resize(MBx, 1, 1, "bl") # the upper diagonal matrix of the band |
| MLx = resize(MBx, 1, 1, "tr") # the lower diagonal matrix of the band |
| |
| A=MUx+MDx+MLx # create the complete tri-diagonal matrix |
| |
| #calculate b matrix |
| Yu = trunc(Y, 1, "up") # Yu is (Y where Y[0] is removed) |
| Yd = trunc(Y, 1, "down") # Yd is (Y where Y[n] is removed) |
| By=(Yu-Yd)/(Bx*Bx) # the expr => Delta(Y(i))/Delta(X(i))*Delta(X(i)) |
| |
| By1=resize(By, 1, 0, "tr") # By1 is (0, By) vector |
| By2=resize(By, 1, 0, "br") # By2 is (By, 0) vector |
| b=3*(By1+By2) # the b entries 3*(Delta(Y(i))/Delta(X(i))*Delta(X(i)) + Delta(Y(i+1))/Delta(X(i+1))*Delta(X(i+1))) |
| |
| |
| # solve Ax = b for x vector and assign it to K |
| K = CGSolver(A, b, |
| max_iteration, tolerance, fileLog) |
| |
| } |
| |
| #given the X<nx1> and Y<nx1> n sample points and K (the first derivative of the interp polynomial), it calculate the |
| # y for the given x using the cubic spline interpolation |
| interpSpline = function( |
| double x, matrix[double] X, matrix[double] Y, matrix[double] K |
| ) return (double q) { |
| |
| #first find the right knots for interpolation |
| i = as.integer(nrow(X) - sum(X >= x) + 1) |
| |
| #calc the y as per the algo docs |
| t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1]) |
| |
| a = K[i-1,1]*(X[i,1]-X[i-1,1]) - (Y[i,1]-Y[i-1,1]) |
| b = -K[i,1]*(X[i,1]-X[i-1,1]) + (Y[i,1]-Y[i-1,1]) |
| |
| qm = (1-t)*Y[i-1,1] + t*Y[i,1] + t*(1-t)*(a*(1-t)+b*t) |
| |
| q = as.scalar(qm) |
| |
| } |
| |
| |
| #solve Ax = b |
| # for CG our formulation is |
| # t(A)Ax = t(A)b // where t is transpose |
| CGSolver = function ( |
| matrix[double] A, matrix[double] b, |
| int max_iteration, |
| double tolerance, |
| String fileLog |
| ) return (matrix[double] x) { |
| |
| n = nrow(A) |
| if (max_iteration < 0) { |
| max_iteration = n |
| } |
| if (tolerance < 0) { |
| tolerance = 0.000001 |
| } |
| |
| x = matrix (0, rows = n, cols = 1); #/* solution vector x<nx1>*/ |
| |
| # BEGIN THE CONJUGATE GRADIENT ALGORITHM |
| print ("Running the CG algorithm..."); |
| |
| i = 0 |
| r = -t(A) %*% b # initial guess x0 = t(0.0) |
| p = -r |
| norm_r2 = sum (r ^ 2) |
| norm_r2_initial = norm_r2 |
| norm_r2_target = norm_r2_initial * tolerance ^ 2 |
| print ("||r|| initial value = " + sqrt (norm_r2_initial) + ", target value = " + sqrt (norm_r2_target)); |
| log_str = "CG_RESIDUAL_NORM,0," + sqrt (norm_r2_initial) |
| log_str = append (log_str, "CG_RESIDUAL_RATIO,0,1.0") |
| |
| while (i < max_iteration & norm_r2 > norm_r2_target) { |
| q = t(A) %*% (A %*% p) |
| a = norm_r2 / sum (p*q) |
| x = x + a*p |
| r = r + a*q |
| old_norm_r2 = norm_r2 |
| norm_r2 = sum(r^2) |
| p = -r + (norm_r2/ old_norm_r2)*p |
| i = i + 1 |
| print ("Iteration " + i + ": ||r|| / ||r init|| = " + sqrt (norm_r2 / norm_r2_initial)) |
| log_str = append (log_str, "CG_RESIDUAL_NORM," + i + "," + sqrt (norm_r2)) |
| log_str = append (log_str, "CG_RESIDUAL_RATIO," + i + "," + sqrt (norm_r2 / norm_r2_initial)) |
| } |
| if (i >= max_iteration) { |
| print ("Warning: the maximum number of iterations has been reached.") |
| } |
| |
| print ("The CG algorithm is done.") |
| # END THE CONJUGATE GRADIENT ALGORITHM |
| |
| if (fileLog != " ") { |
| write (log_str, fileLog); |
| } else { |
| print("log_str:" + log_str) |
| } |
| } |
| |
| |
| # |
| # trunc the matrix by the specified amount in the specified direction. |
| # The shifted cells are discarded, the matrix is smaller in size |
| # |
| trunc = function ( |
| matrix[double] X, # nxm matrix |
| int by, # shift amount |
| String dir # currently only 'up' is supported. but should handle 'up', 'down', 'left', 'right' |
| ) return (matrix[double] Y) # Y is |
| { |
| r = nrow(X); c = ncol(X); |
| if (by > r) { |
| stop("can't pop matrix more than number of rows") |
| } |
| Y = matrix(0.0, r-by, c) |
| |
| if (r != by ) { |
| if (dir == "up") { |
| Y[1:r-by,] = X[1+by:r,] |
| } else if (dir == "down") { |
| Y[1:r-by,] = X[1:r-by,] |
| } else { |
| stop("trunc unsupported direction " + dir) |
| } |
| } |
| } |
| |
| # resize (only grow and not truncate) the matrix by the specified amount in the specified direction |
| resize = function( |
| matrix[double] X, #nxm matrix |
| int rby, # row resize count |
| int cby, # col resize count |
| String dir |
| ) return (matrix[double] Y) # Y is |
| { |
| r = nrow(X); c = ncol(X); |
| rn = r + rby; cn = c + cby; |
| Y = matrix(0.0, rn, cn) |
| if (dir == "tr") { # top right |
| Y[1+rby:rn, 1:c] = X |
| } else if (dir == "bl") { # bottom left |
| Y[1:r, 1+cby:cn] = X |
| } else if (dir == "tl") { # top left |
| Y[1+rby:rn, 1+cby:cn ] = X |
| } else if (dir == "br") { # bottom right |
| Y[1:r, 1:c] = X |
| } else { |
| stop("Unknown direction dir => " + dir) |
| } |
| } |
| |