| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # Utility script to generate random data for a matrix |
| # |
| # Parameters: |
| # R : (input) number of rows for the generated matrix |
| # C : (input) number of columns for the generated matrix |
| # S : (input) sparsity of the data |
| # Min : (input) minimum value of the data |
| # Max : (input) maximum value of the data |
| # Pdf : (input) probability distribution function for the data |
| # Path : (input) path on HDFS where the matrix will be stored |
| # |
| # Example: |
| # hadoop jar SystemML.jar -f algorithms/utils/project.dml -nvargs X="/tmp/M.mtx" P="/tmp/P.mtx" o="/tmp/PX.mtx" |
| # |
| # Assumptions: |
| # The order of colIDs in P is preserved. Order of columns in result is same as order of columns in P. |
| # i.e. projecting columns 4 and 2 of X results in a matrix with columns 4 and 2. |
| # If P specifies the exclude list, then projected columns are order preserved. |
| |
| numRows = ifdef($R, 5) |
| numCols = ifdef($C, 5) |
| sparsityParam = ifdef($S, 0.2) |
| minVal = ifdef($Min, 0) |
| maxVal = ifdef($Max, 10) |
| pdFunc = ifdef($Pdf, "uniform") |
| pathUse = ifdef($Path, "/user/bigr/randomData") |
| |
| A = rand(rows=numRows, cols=numCols, sparsity=sparsityParam, min=minVal, max=maxVal, pdf="uniform"); |
| write(A, pathUse, format="csv"); |