| <!-- |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| --> |
| |
| <root> |
| <!-- local fs tmp working directory--> |
| <sysds.localtmpdir>/tmp/systemds</sysds.localtmpdir> |
| |
| <!-- hdfs tmp working directory--> |
| <sysds.scratch>scratch_space</sysds.scratch> |
| |
| <!-- compiler optimization level, valid values: 0 | 1 | 2 | 3 | 4, default: 2 --> |
| <sysds.optlevel>2</sysds.optlevel> |
| |
| <!-- default block dim for binary block files --> |
| <sysds.defaultblocksize>1000</sysds.defaultblocksize> |
| |
| <!-- enables multi-threaded operations in singlenode control program --> |
| <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> |
| |
| <!-- enables multi-threaded read/write in singlenode control program --> |
| <sysds.cp.parallel.io>true</sysds.cp.parallel.io> |
| |
| <!-- enalbe multi-threaded transformencode and apply --> |
| <sysds.parallel.encode>true</sysds.parallel.encode> |
| |
| <!-- synchronization barrier between transformencode build and apply --> |
| <sysds.parallel.encode.staged>false</sysds.parallel.encode.staged> |
| |
| <!-- #parallel row blocks in multi-threaded transformencode build phase --> |
| <sysds.parallel.encode.buildBlocks>-1</sysds.parallel.encode.buildBlocks> |
| |
| <!-- #parallel row blocks in multi-threaded transformencode apply phase --> |
| <sysds.parallel.encode.applyBlocks>-1</sysds.parallel.encode.applyBlocks> |
| |
| <!-- #threads in multi-threaded transformencode --> |
| <sysds.parallel.encode.numThreads>-1</sysds.parallel.encode.numThreads> |
| |
| <!-- enable multi-threaded tokenize --> |
| <sysds.parallel.tokenize>false</sysds.parallel.tokenize> |
| |
| <!-- #blocks the input frame is split up for multithreaded tokenization --> |
| <sysds.parallel.tokenize.numBlocks>64</sysds.parallel.tokenize.numBlocks> |
| |
| <!-- enables compressed linear algebra, experimental feature --> |
| <sysds.compressed.linalg>false</sysds.compressed.linalg> |
| |
| <!-- enables operator fusion via code generation, experimental feature --> |
| <sysds.codegen.enabled>false</sysds.codegen.enabled> |
| |
| <!-- set the codegen API (auto, java, cuda) --> |
| <sysds.codegen.api>auto</sysds.codegen.api> |
| |
| <!-- set the codegen java compiler (auto, janino, javac, nvcc, nvrtc) --> |
| <sysds.codegen.compiler>auto</sysds.codegen.compiler> |
| |
| <!-- set the codegen optimizer (fuse_all, fuse_no_redundancy, fuse_cost_based_v2) --> |
| <sysds.codegen.optimizer>fuse_cost_based_v2</sysds.codegen.optimizer> |
| |
| <!-- if codegen.enabled, enables source code caching of fused operators --> |
| <sysds.codegen.plancache>true</sysds.codegen.plancache> |
| |
| <!-- if codegen.enabled, compile literals as constants: 1..heuristic, 2..always --> |
| <sysds.codegen.literals>1</sysds.codegen.literals> |
| |
| <!-- enables native blas for matrix multiplication and convolution, experimental feature (options: auto, mkl, openblas, none) --> |
| <sysds.native.blas>none</sysds.native.blas> |
| |
| <!-- custom directory where BLAS libraries are available, experimental feature (options: absolute directory path or none). If set to none, we use standard LD_LIBRARY_PATH. --> |
| <sysds.native.blas.directory>none</sysds.native.blas.directory> |
| |
| <!-- sets the GPUs to use per process, -1 for all GPUs, a specific GPU number (5), a range (eg: 0-2) or a comma separated list (eg: 0,2,4)--> |
| <sysds.gpu.availableGPUs>-1</sysds.gpu.availableGPUs> |
| |
| <!-- whether to synchronize GPUs after every GPU instruction --> |
| <sysds.gpu.sync.postProcess>false</sysds.gpu.sync.postProcess> |
| |
| <!-- whether to perform eager CUDA free on rmvar instruction --> |
| <sysds.gpu.eager.cudaFree>false</sysds.gpu.eager.cudaFree> |
| |
| <!-- Developer flag used to debug GPU memory leaks. This has huge performance overhead and should be only turned on for debugging purposes. --> |
| <sysds.gpu.print.memoryInfo>false</sysds.gpu.print.memoryInfo> |
| |
| <!-- the floating point precision. supported values are double, single --> |
| <sysds.floating.point.precision>double</sysds.floating.point.precision> |
| |
| <!-- the eviction policy for the GPU bufferpool. Supported values are lru, mru, lfu, min_evict, align_memory --> |
| <sysds.gpu.eviction.policy>min_evict</sysds.gpu.eviction.policy> |
| |
| <!-- maximum wrap length for instruction and miscellaneous timer column of statistics --> |
| <sysds.stats.maxWrapLength>30</sysds.stats.maxWrapLength> |
| |
| <!-- Advanced optimization: fraction of driver memory to use for GPU shadow buffer. This optimization is ignored for double precision. |
| By default, it is disabled (hence set to 0.0). If you intend to train network larger than GPU memory size, consider using single precision and setting this to 0.1 --> |
| <sysds.gpu.eviction.shadow.bufferSize>0.0</sysds.gpu.eviction.shadow.bufferSize> |
| |
| <!-- Fraction of available GPU memory to use. This is similar to TensorFlow's per_process_gpu_memory_fraction configuration property. (default: 0.9) --> |
| <sysds.gpu.memory.util.factor>0.9</sysds.gpu.memory.util.factor> |
| |
| <!-- Allocator to use to allocate GPU device memory. Supported values are cuda, unified_memory (default: cuda) --> |
| <sysds.gpu.memory.allocator>cuda</sysds.gpu.memory.allocator> |
| |
| <!-- If rule-based operator placement for GPU enabled --> |
| <sysds.gpu.place.rulebased>false</sysds.gpu.place.rulebased> |
| |
| <!-- enables disk spilling for lineage cache --> |
| <sysds.lineage.cachespill>true</sysds.lineage.cachespill> |
| |
| <!-- enables compiler assisted partial rewrites (e.g. Append-TSMM) --> |
| <sysds.lineage.compilerassisted>true</sysds.lineage.compilerassisted> |
| |
| <!-- set the federated plan generator (none, [runtime], compile_fed_all, compile_fed_heuristic) --> |
| <sysds.federated.planner>runtime</sysds.federated.planner> |
| |
| <!-- set the degree of parallelism of the federated worker event loop (<=0 means number of virtual cores) --> |
| <sysds.federated.par_conn>0</sysds.federated.par_conn> |
| |
| <!-- Set worker polling frequency for the monitoring backend in seconds --> |
| <sysds.federated.monitorFreq>3</sysds.federated.monitorFreq> |
| |
| <!-- set the degree of parallelism of the federated worker instructions (<=0 means number of virtual cores) --> |
| <sysds.federated.par_inst>0</sysds.federated.par_inst> |
| |
| <!-- enables the federated read cache for multi-tenancy / cross-session reuse --> |
| <sysds.federated.readcache>true</sysds.federated.readcache> |
| |
| <!-- sets the federated compression strategy (none, zlib, snappy, fastlz, lz4, lzf) --> |
| <sysds.federated.compression>none</sysds.federated.compression> |
| |
| <!-- set buffer pool threshold (max size) in % of total heap --> |
| <sysds.caching.bufferpoollimit>15</sysds.caching.bufferpoollimit> |
| |
| <!-- set memory manager (static, unified) --> |
| <sysds.caching.memorymanager>static</sysds.caching.memorymanager> |
| |
| <!-- Asynchronously trigger prefetch (Spark intermediate) --> |
| <sysds.async.prefetch>false</sysds.async.prefetch> |
| |
| <!-- Asynchronously trigger broadcast (CP intermediate) --> |
| <sysds.async.broadcast>false</sysds.async.broadcast> |
| |
| <!-- Compile-time synchronous/asynchronous checkpoint placement --> |
| <sysds.async.checkpoint>false</sysds.async.checkpoint> |
| |
| </root> |