| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.sysds.api; |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.apache.commons.cli.CommandLine; |
| import org.apache.commons.cli.CommandLineParser; |
| import org.apache.commons.cli.Option; |
| import org.apache.commons.cli.OptionBuilder; |
| import org.apache.commons.cli.OptionGroup; |
| import org.apache.commons.cli.Options; |
| import org.apache.commons.cli.PosixParser; |
| import org.apache.sysds.common.Types.ExecMode; |
| import org.apache.sysds.hops.OptimizerUtils; |
| import org.apache.sysds.runtime.lineage.LineageCacheConfig.LineageCachePolicy; |
| import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType; |
| import org.apache.sysds.utils.Explain; |
| import org.apache.sysds.utils.Explain.ExplainType; |
| |
| /** |
| * Set of DMLOptions that can be set through the command line |
| * and {@link org.apache.sysds.api.mlcontext.MLContext} |
| * The values have been initialized with the default values |
| * Despite there being a DML and PyDML, this class is named DMLOptions |
| * to keep it consistent with {@link DMLOptions} and {@link DMLOptions} |
| */ |
| public class DMLOptions { |
| public final Options options; |
| public Map<String, String> argVals = new HashMap<>(); // Arguments map containing either named arguments or arguments by position for a DML program |
| public String configFile = null; // Path to config file if default config and default config is to be overridden |
| public boolean clean = false; // Whether to clean up all SystemDS working directories (FS, DFS) |
| public boolean stats = false; // Whether to record and print the statistics |
| public int statsCount = 10; // Default statistics count |
| public boolean memStats = false; // max memory statistics |
| public Explain.ExplainType explainType = Explain.ExplainType.NONE; // Whether to print the "Explain" and if so, what type |
| public ExecMode execMode = OptimizerUtils.getDefaultExecutionMode(); // Execution mode standalone, MR, Spark or a hybrid |
| public boolean gpu = false; // Whether to use the GPU |
| public boolean forceGPU = false; // Whether to ignore memory & estimates and always use the GPU |
| public boolean debug = false; // to go into debug mode to be able to step through a program |
| public String filePath = null; // path to script |
| public String script = null; // the script itself |
| public boolean help = false; // whether to print the usage option |
| public boolean lineage = false; // whether compute lineage trace |
| public boolean lineage_dedup = false; // whether deduplicate lineage items |
| public ReuseCacheType linReuseType = ReuseCacheType.NONE; // reuse type (full, partial, hybrid) |
| public LineageCachePolicy linCachePolicy= LineageCachePolicy.HYBRID; // lineage cache eviction policy |
| public boolean fedWorker = false; |
| public int fedWorkerPort = -1; |
| public boolean checkPrivacy = false; // Check which privacy constraints are loaded and checked during federated execution |
| |
| public final static DMLOptions defaultOptions = new DMLOptions(null); |
| |
| public DMLOptions(Options opts) { |
| options = opts; |
| } |
| |
| @Override |
| public String toString() { |
| return "DMLOptions{" + |
| "argVals=" + argVals + |
| ", configFile='" + configFile + '\'' + |
| ", clean=" + clean + |
| ", stats=" + stats + |
| ", statsCount=" + statsCount + |
| ", memStats=" + memStats + |
| ", explainType=" + explainType + |
| ", execMode=" + execMode + |
| ", gpu=" + gpu + |
| ", forceGPU=" + forceGPU + |
| ", debug=" + debug + |
| ", filePath='" + filePath + '\'' + |
| ", script='" + script + '\'' + |
| ", help=" + help + |
| ", lineage=" + lineage + |
| ", w=" + fedWorker + |
| '}'; |
| } |
| |
| /** |
| * Parses command line arguments to create a {@link DMLOptions} instance with the correct options |
| * @param args arguments from the command line |
| * @return an instance of {@link DMLOptions} that contain the correct {@link Option}s. |
| * @throws org.apache.commons.cli.ParseException if there is an incorrect option specified in the CLI |
| */ |
| public static DMLOptions parseCLArguments(String[] args) |
| throws org.apache.commons.cli.ParseException |
| { |
| Options options = createCLIOptions(); |
| CommandLineParser clParser = new PosixParser(); |
| CommandLine line = clParser.parse(options, args); |
| |
| DMLOptions dmlOptions = new DMLOptions(options); |
| dmlOptions.help = line.hasOption("help"); |
| if (line.hasOption("lineage")){ |
| dmlOptions.lineage = true; |
| String lineageTypes[] = line.getOptionValues("lineage"); |
| if (lineageTypes != null) { |
| for (String lineageType : lineageTypes) { |
| if (lineageType != null){ |
| if (lineageType.equalsIgnoreCase("dedup")) |
| dmlOptions.lineage_dedup = lineageType.equalsIgnoreCase("dedup"); |
| else if (lineageType.equalsIgnoreCase("reuse_full") |
| || lineageType.equalsIgnoreCase("reuse")) |
| dmlOptions.linReuseType = ReuseCacheType.REUSE_FULL; |
| else if (lineageType.equalsIgnoreCase("reuse_partial")) |
| dmlOptions.linReuseType = ReuseCacheType.REUSE_PARTIAL; |
| else if (lineageType.equalsIgnoreCase("reuse_multilevel")) |
| dmlOptions.linReuseType = ReuseCacheType.REUSE_MULTILEVEL; |
| else if (lineageType.equalsIgnoreCase("reuse_hybrid")) |
| dmlOptions.linReuseType = ReuseCacheType.REUSE_HYBRID; |
| else if (lineageType.equalsIgnoreCase("none")) |
| dmlOptions.linReuseType = ReuseCacheType.NONE; |
| else if (lineageType.equalsIgnoreCase("policy_lru")) |
| dmlOptions.linCachePolicy = LineageCachePolicy.LRU; |
| else if (lineageType.equalsIgnoreCase("policy_costnsize")) |
| dmlOptions.linCachePolicy = LineageCachePolicy.COSTNSIZE; |
| else if (lineageType.equalsIgnoreCase("policy_hybrid")) |
| dmlOptions.linCachePolicy = LineageCachePolicy.HYBRID; |
| else |
| throw new org.apache.commons.cli.ParseException( |
| "Invalid argument specified for -lineage option: " + lineageType); |
| } |
| } |
| } |
| } |
| dmlOptions.debug = line.hasOption("debug"); |
| dmlOptions.gpu = line.hasOption("gpu"); |
| if (dmlOptions.gpu) { |
| String force = line.getOptionValue("gpu"); |
| if (force != null) { |
| if (force.equalsIgnoreCase("force")) { |
| dmlOptions.forceGPU = true; |
| } else { |
| throw new org.apache.commons.cli.ParseException("Invalid argument specified for -gpu option"); |
| } |
| } |
| } |
| if (line.hasOption("exec")){ |
| String execMode = line.getOptionValue("exec"); |
| if (execMode != null){ |
| if (execMode.equalsIgnoreCase("singlenode")) dmlOptions.execMode = ExecMode.SINGLE_NODE; |
| else if (execMode.equalsIgnoreCase("hybrid")) dmlOptions.execMode = ExecMode.HYBRID; |
| else if (execMode.equalsIgnoreCase("spark")) dmlOptions.execMode = ExecMode.SPARK; |
| else throw new org.apache.commons.cli.ParseException("Invalid argument specified for -exec option, must be one of [hadoop, singlenode, hybrid, HYBRID, spark]"); |
| } |
| } |
| if (line.hasOption("explain")) { |
| dmlOptions.explainType = ExplainType.RUNTIME; |
| String explainType = line.getOptionValue("explain"); |
| if (explainType != null){ |
| if (explainType.equalsIgnoreCase("hops")) dmlOptions.explainType = ExplainType.HOPS; |
| else if (explainType.equalsIgnoreCase("runtime")) dmlOptions.explainType = ExplainType.RUNTIME; |
| else if (explainType.equalsIgnoreCase("recompile_hops")) dmlOptions.explainType = ExplainType.RECOMPILE_HOPS; |
| else if (explainType.equalsIgnoreCase("recompile_runtime")) dmlOptions.explainType = ExplainType.RECOMPILE_RUNTIME; |
| else throw new org.apache.commons.cli.ParseException("Invalid argument specified for -hops option, must be one of [hops, runtime, recompile_hops, recompile_runtime]"); |
| } |
| } |
| dmlOptions.stats = line.hasOption("stats"); |
| if (dmlOptions.stats){ |
| String statsCount = line.getOptionValue("stats"); |
| if (statsCount != null) { |
| try { |
| dmlOptions.statsCount = Integer.parseInt(statsCount); |
| } catch (NumberFormatException e) { |
| throw new org.apache.commons.cli.ParseException("Invalid argument specified for -stats option, must be a valid integer"); |
| } |
| } |
| } |
| dmlOptions.memStats = line.hasOption("mem"); |
| |
| dmlOptions.clean = line.hasOption("clean"); |
| |
| if (line.hasOption("config")){ |
| dmlOptions.configFile = line.getOptionValue("config"); |
| } |
| |
| if (line.hasOption("w")){ |
| dmlOptions.fedWorker = true; |
| dmlOptions.fedWorkerPort = Integer.parseInt(line.getOptionValue("w")); |
| } |
| |
| if (line.hasOption("f")){ |
| dmlOptions.filePath = line.getOptionValue("f"); |
| } |
| |
| if (line.hasOption("s")){ |
| dmlOptions.script = line.getOptionValue("s"); |
| } |
| |
| // Positional arguments map is created as ("$1", "a"), ("$2", 123), etc |
| if (line.hasOption("args")){ |
| String[] argValues = line.getOptionValues("args"); |
| for (int k=0; k<argValues.length; k++){ |
| String str = argValues[k]; |
| if (!str.isEmpty()) { |
| dmlOptions.argVals.put("$" + (k+1), str); |
| } |
| } |
| } |
| |
| // Named arguments map is created as ("$K, 123), ("$X", "X.csv"), etc |
| if (line.hasOption("nvargs")){ |
| String varNameRegex = "^[a-zA-Z]([a-zA-Z0-9_])*$"; |
| String[] nvargValues = line.getOptionValues("nvargs"); |
| for (String str : nvargValues){ |
| if (!str.isEmpty()){ |
| String[] kv = str.split("="); |
| if (kv.length != 2){ |
| throw new org.apache.commons.cli.ParseException("Invalid argument specified for -nvargs option, must be a list of space separated K=V pairs, where K is a valid name of a variable in the DML/PyDML program"); |
| } |
| if (!kv[0].matches(varNameRegex)) { |
| throw new org.apache.commons.cli.ParseException("Invalid argument specified for -nvargs option, " + kv[0] + " does not seem like a valid variable name in DML. Valid variable names in DML start with upper-case or lower-case letter, and contain only letters, digits, or underscores"); |
| } |
| dmlOptions.argVals.put("$" + kv[0], kv[1]); |
| } |
| } |
| } |
| |
| dmlOptions.checkPrivacy = line.hasOption("checkPrivacy"); |
| |
| return dmlOptions; |
| } |
| |
| @SuppressWarnings("static-access") |
| private static Options createCLIOptions() { |
| Options options = new Options(); |
| Option nvargsOpt = OptionBuilder.withArgName("key=value") |
| .withDescription("parameterizes DML script with named parameters of the form <key=value>; <key> should be a valid identifier in DML/PyDML") |
| .hasArgs().create("nvargs"); |
| Option argsOpt = OptionBuilder.withArgName("argN") |
| .withDescription("specifies positional parameters; first value will replace $1 in DML program; $2 will replace 2nd and so on") |
| .hasArgs().create("args"); |
| Option configOpt = OptionBuilder.withArgName("filename") |
| .withDescription("uses a given configuration file (can be on local/hdfs/gpfs; default values in SystemDS-config.xml") |
| .hasArg().create("config"); |
| Option cleanOpt = OptionBuilder.withDescription("cleans up all SystemDS working directories (FS, DFS); all other flags are ignored in this mode.") |
| .create("clean"); |
| Option statsOpt = OptionBuilder.withArgName("count") |
| .withDescription("monitors and reports summary execution statistics; heavy hitter <count> is 10 unless overridden; default off") |
| .hasOptionalArg().create("stats"); |
| Option memOpt = OptionBuilder.withDescription("monitors and reports max memory consumption in CP; default off") |
| .create("mem"); |
| Option explainOpt = OptionBuilder.withArgName("level") |
| .withDescription("explains plan levels; can be 'hops' / 'runtime'[default] / 'recompile_hops' / 'recompile_runtime'") |
| .hasOptionalArg().create("explain"); |
| Option execOpt = OptionBuilder.withArgName("mode") |
| .withDescription("sets execution mode; can be 'hadoop' / 'singlenode' / 'hybrid'[default] / 'HYBRID' / 'spark'") |
| .hasArg().create("exec"); |
| Option gpuOpt = OptionBuilder.withArgName("force") |
| .withDescription("uses CUDA instructions when reasonable; set <force> option to skip conservative memory estimates and use GPU wherever possible; default off") |
| .hasOptionalArg().create("gpu"); |
| Option debugOpt = OptionBuilder.withDescription("runs in debug mode; default off") |
| .create("debug"); |
| Option pythonOpt = OptionBuilder.withDescription("parses Python-like DML") |
| .create("python"); |
| Option fileOpt = OptionBuilder.withArgName("filename") |
| .withDescription("specifies dml/pydml file to execute; path can be local/hdfs/gpfs (prefixed with appropriate URI)") |
| .isRequired().hasArg().create("f"); |
| Option scriptOpt = OptionBuilder.withArgName("script_contents") |
| .withDescription("specified script string to execute directly") |
| .isRequired().hasArg().create("s"); |
| Option helpOpt = OptionBuilder.withDescription("shows usage message") |
| .create("help"); |
| Option lineageOpt = OptionBuilder.withDescription("computes lineage traces") |
| .hasOptionalArgs().create("lineage"); |
| Option fedOpt = OptionBuilder.withDescription("starts a federated worker with the given argument as the port.") |
| .hasOptionalArg().create("w"); |
| Option checkPrivacy = OptionBuilder |
| .withDescription("Check which privacy constraints are loaded and checked during federated execution") |
| .create("checkPrivacy"); |
| |
| options.addOption(configOpt); |
| options.addOption(cleanOpt); |
| options.addOption(statsOpt); |
| options.addOption(memOpt); |
| options.addOption(explainOpt); |
| options.addOption(execOpt); |
| options.addOption(gpuOpt); |
| options.addOption(debugOpt); |
| options.addOption(pythonOpt); |
| options.addOption(lineageOpt); |
| options.addOption(fedOpt); |
| options.addOption(checkPrivacy); |
| |
| // Either a clean(-clean), a file(-f), a script(-s) or help(-help) needs to be specified |
| OptionGroup fileOrScriptOpt = new OptionGroup() |
| .addOption(scriptOpt) |
| .addOption(fileOpt) |
| .addOption(cleanOpt) |
| .addOption(helpOpt) |
| .addOption(fedOpt); |
| fileOrScriptOpt.setRequired(true); |
| options.addOptionGroup(fileOrScriptOpt); |
| |
| // Either -args or -nvargs |
| options.addOptionGroup(new OptionGroup() |
| .addOption(nvargsOpt).addOption(argsOpt)); |
| options.addOption(helpOpt); |
| |
| return options; |
| } |
| } |