| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # read data frame |
| F = read($X, data_type="frame", format="csv"); |
| # the mask for identifying categorical columns |
| Mask = read($Mask) |
| |
| # Test cases |
| # case 1: if all columns are categorical |
| if(sum(Mask) == ncol(F)) |
| { |
| scat = seq(1, ncol(Mask)) |
| s = "1"; |
| for(i in 2:ncol(F)) |
| s = s + "," + i; |
| # encoding categorical columns using recode transformation |
| jspecR = "{ids:true, recode:["+s+"]}"; |
| [X, M] = transformencode(target=F, spec=jspecR); |
| # call mice |
| dataset = mice(X=X,cMask=Mask, iter=$iteration, verbose = FALSE ) |
| # decode data back to original format |
| output = as.matrix(transformdecode(target=dataset, spec=jspecR, meta=M)); |
| # cherry picking columns to compare with R results |
| output = output[, 3:4] |
| write(output, $dataC) |
| } |
| # case 2: if all data is numeric |
| else if(sum(Mask) == 0){ |
| # no transformation is required, cast the frame into matrix and call mice |
| # as.matrix() will convert the null values into zeros, so explicitly replace zeros with NaN |
| X = replace(target = as.matrix(F), pattern = 0, replacement = NaN) |
| output = mice(X=X, cMask=Mask, iter=$iteration, verbose = FALSE ) |
| write(output, $dataN) |
| } |
| # case 3: if the data is combination of numeric and categorical columns |
| else |
| { |
| scat = seq(1, ncol(Mask)) |
| cat = removeEmpty(target=scat, margin="rows", select=t(Mask)) |
| s = "" + as.integer(as.scalar(cat[1, 1])) |
| for(i in 2:nrow(cat)) |
| s = s + "," + as.integer(as.scalar(cat[i, 1])); |
| |
| # encoding categorical columns using recode transformation |
| jspecR = "{ids:true, recode:["+s+"]}"; |
| [X, M] = transformencode(target=F, spec=jspecR); |
| # call mice |
| dataset = mice(X=X,cMask=Mask, iter=$iteration, verbose = FALSE ) |
| # decode data into original format |
| output = as.matrix(transformdecode(target=dataset, spec=jspecR, meta=M)); |
| # below lines are only for testing purpose |
| c = output * (Mask) |
| c = removeEmpty(target=c, margin = "cols") |
| n = output * (1-Mask) |
| n = removeEmpty(target=n, margin = "cols") |
| write(n, $dataN) |
| write(c, $dataC) |
| } |