| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # Utility script to shuffle input dataset |
| # |
| # Parameters: |
| # x : (input) data set |
| # o : (output) |
| # ofmt : (default "csv") format of output |
| # |
| # Example: |
| # hadoop jar SystemML.jar -f algorithms/utils/shuffle.dml -nvargs x="/tmp/M.mtx" o="/tmp/o.mtx" |
| # |
| |
| ofmt = ifdef ($ofmt, "csv"); |
| |
| x = read ($x); |
| num_col = ncol(x) |
| # Random vector used to shuffle the dataset |
| y = rand(rows=nrow(x), cols=1, min=0, max=1, pdf="uniform") |
| x = order(target = cbind(x, y), by = num_col + 1) |
| o = x[,1:num_col] |
| |
| write (o, $o, format=ofmt); |