blob: ee854a23faa035e02b6262bcbd64f5c96248a99d [file] [log] [blame]
# -----------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# -----------------------------------------------------------------------
# WARNING: DO NOT EDIT THIS FILE.
# All customizations must be created in a file "site.ducc.properties" that is in
# your ducc_runtime/resources. DUCC's startup procedures will merge this file and
# your site.ducc.propeties into ducc.properties which is what DUCC will use.
# WARNING DO NOT EDIT THIS FILE.
# ====================================================================================
# This configuration file contains most of the specifications for DUCC.
#
# Two other important customizable files define the classes and nodes.
#
# The class definition file is specified by the property 'ducc.rm.class.definitions'
# and a sample called ducc.classes has been provided in DUCC_HOME/resources.
#
# Node definitions are by default taken from ducc.nodes but this may be overridden
# with the -n option on start_ducc.
#
# The first two entries here are customized in site.ducc.properties by ducc_post_install.
#
# ====================================================================================
# +==================================================================================================+
# | General |
# +==================================================================================================+
# The name of the node where DUCC runs.
# This property declares the node where the DUCC administrative processes run (Orchestrator,
# Resource Manager, Process Manager, Service Manager). This property is required and MUST be
# configured in new installation. The installation script ducc_post_install initializes this
# property to the node the script is executed on.
# Reliable DUCC: if running reliably, then this value must resolve to the same ip address
# specified for the virtual_ipaddress in /etc/keepalived/keepalived.conf for master and
# backup nodes. To avoid nameserver glitches, consider specifying the ip address, not the
# host name. DUCC CLI and Agents employ this value to connect to the current reliable
# DUCC head node.
ducc.head = <head-node>
# Reliable DUCC: if running reliably, then this value must comprise the blank delimited list
# of nodes that are eligible to become the DUCC head node. Admin commands start_ducc and
# stop_ducc are only allowed on the ducc.head node or any node in the ducc.head.reliable.list.
# An empty ducc.head.reliable.list indicates that DUCC is not running in reliably.
ducc.head.reliable.list =
# The full name of the Java command.
# This specifies the full path to the JVM to be used by the DUCC processes. This MUST be
# configured. The installation script ducc_post_install initializes this property to
# full path to java in the installer's environment. (If the java command cannot be found,
# ducc_post_install exits with error.)
ducc.jvm = <full-path-to-java-command>
# The name of the cluster as shown by the Web Server.
# This is a string used in the Web Server banner to identify the local cluster. It is used
# for informational purposes only and may be set to anything desired.
ducc.cluster.name=Apache UIMA-DUCC
# Specify location of private resources directory. UIMA-3892
ducc.private.resources = ${DUCC_HOME}/resources.private
# Location of security home directory. When non-empty the userid is appended and it replaces the
# default of <user.home> as the location of the keys used in request validation.
ducc.security.home =
# Name any site-local jars.
# This may be used to list site-specific jars that are required by local
# customizations, e.g. authentication. The value must be a blank-delimited list of jars
# relative to the DUCC_HOME/lib directory.
#ducc.local.jars =
# Declare the type of middleware providing the JMS service used by DUCC.
ducc.jms.provider=activemq
# +==================================================================================================+
# | Broker |
# +==================================================================================================+
#Declare the wire protocol used to communicate with ActiveMQ.
ducc.broker.protocol=tcp
# This declares the port on which the ActiveMQ broker is listening for messages.
ducc.broker.port=61617
# The broker *client* url decoration (ie - DUCC daemons).
ducc.broker.url.decoration=jms.useCompression=true&jms.prefetchPolicy.all=0
# The Broker's name must match the actual broker name in the broker config.
# This is the internal name of the broker, used to locate Broker's MBean in
# JMX Registry. It is NOT related to any node name. When using the ActiveMQ
# distribution supplied with DUCC it should always be set to localhost.
ducc.broker.name=localhost
# The Broker's jmx port.
# This is the port used to make JMX connections to the broker. This should only
# be changed by administrators familiar with ActiveMQ configuration.
ducc.broker.jmx.port=1100
# If set to true, DUCC will start and stop the ActiveMQ broker as part of its normal
# start/stop scripting.
ducc.broker.automanage = true
# This is the ActiveMQ credentials file used to authenticate DUCC daemons with the
# broker.
ducc.broker.credentials.file=${ducc.private.resources}/ducc-broker-credentials.properties
# This sets the heap size for the broker.
ducc.broker.memory.options = -Xmx1G
# This is the ActiveMQ configuration file to use. The path
# must be specified relative to the ActiveMQ installation directory.
ducc.broker.configuration = conf/activemq-ducc.xml
# This names the location where ActiveMQ is installed.
ducc.broker.home = ${DUCC_HOME}/apache-uima/apache-activemq
# The broker "server" URL decoration,
# = 45 second timeout on write operations to the socket.
ducc.broker.server.url.decoration = transport.soWriteTimeout=45000
# +==================================================================================================+
# | Transport |
# +==================================================================================================+
# To enable tracing of RM messages arriving in OR and NodeMetrics arriving in WS.
#ducc.transport.trace = orchestrator:RmStateDuccEvent webserver:NodeMetricsUpdateDuccEvent
# +==================================================================================================+
# | Authentication |
# +==================================================================================================+
# This specifies the class used for Web Server session authentication.
# If unconfigured, the Web Server enforces no authentication.
#ducc.authentication.implementer=org.apache.uima.ducc.ws.authentication.LinuxAuthenticationManager
#ducc.authentication.implementer=org.apache.uima.ducc.ws.authentication.SecureFileAuthenticator
#ducc.authentication.implementer=org.apache.uima.ducc.ws.authentication.GSAAuthenticator
# Specify users allowed to log in to the web server.
#ducc.authentication.users.include = user1 user2
# Specify users not allowed to login (default is all users can login; if user is in
# both include and exclude lists, then exclude is enforced)
#ducc.authentication.users.exclude = user1 user3
# Specify groups allowed to login (default is all groups can login)
#ducc.authentication.groups.include = groupA groupB
# Specify groups not allowed to login (default is all groups can login; if group is in
# both include and exclude lists, then exclude is enforced)
#ducc.authentication.groups.exclude = groupA groupC
# +==================================================================================================+
# | Language |
# +==================================================================================================+
# Establish the language for national language support of messages.
# Currently only "en" is supported.
ducc.locale.language=en
# Establish the country for National Language Support of messages.
# Currently only "us" is supported.
ducc.locale.country=us
# +==================================================================================================+
# | Daemon Administration |
# +==================================================================================================+
# This is the JMS endpoint name used for DUCC administration messages.
ducc.admin.endpoint=ducc.admin.channel
# This is the JMS message type used for DUCC administration messages.
# Only "topic" is supported.
ducc.admin.endpoint.type=topic
# JMX port number for Ducc process. Each DUCC process will attempt
# to use this port for its JMX Connector. If the port is not available
# port+1 will be used until an available port is found.
# Every process started by DUCC has JMX enabled by default.
# The DUCC WebSever's System.Daemons page is used to find the JMX URL that gets assigned to
# each of the DUCC management processes. The Web Server's Job.Details page for each job is
# used to find the JMX URL that is assigned to each JP.
ducc.jmx.port=2099
ducc.agent.jvm.args = -Xmx500M
ducc.orchestrator.jvm.args = -Xmx1G
ducc.rm.jvm.args = -Xmx1G
ducc.pm.jvm.args = -Xmx1G
ducc.sm.jvm.args = -Xmx1G
# use the following flag under IBM Java 8 to allow Chromium to visit
# w/o getting ERR_SSL_VERSION_OR_CIPHER_MISMATCH
# -Dcom.ibm.jsse2.overrideDefaultTLS=true
# see https://issues.apache.org/jira/browse/UIMA-5475
ducc.ws.jvm.args = -Xmx2G -Djava.util.Arrays.useLegacyMergeSort=true -Dcom.ibm.jsse2.overrideDefaultTLS=true
# +==================================================================================================+
# | Node Administration |
# +==================================================================================================+
# Specify a minimum amount of free swap space available on a node.
# If an agent detects free swap space dipping below the value defined
# below, it will find the fattest (in terms of memory) process in its
# inventory and kill it. The value of the parameter below is expressed
# in bytes.
# Initially disabled by setting the threshold at 0.
ducc.node.min.swap.threshold=0
# +==================================================================================================+
# | Job Administration |
# +==================================================================================================+
# Max number of work-item CASes for each job (default is "unlimited")
# Note: formerly known as ducc.threads.limit
# This enforces a maximum number of pipelines per job, over all its processes. No
# job will have more active work-items than this dispatched.
# The value is related to the size of the Job Driver heap and the real memory consumed by JD.
# If the JD is consuming too much memory, try reducing this value.
ducc.job.max.pipelines.count = 5000
# +==================================================================================================+
# | CLI Administration |
# +==================================================================================================+
# These environment values are included on job/service/AP submissions
ducc.environment.propagated = USER HOME LANG DUCC_SERVICE_INSTANCE
# No timeout on CLI requests
ducc.cli.httpclient.sotimeout=0
#------------------------------------------------------------------------------
# When set, the CLI signs each request so the Orchestrator can be sure the
# requestor is actually who he claims to be.
# off, // CLI submit and cancel signature enforcement disabled
# on, // CLI submit and cancel signature enforcement enabled (default)
ducc.signature.required=on
#------------------------------------------------------------------------------
# +==================================================================================================+
# | Web Server |
# +==================================================================================================+
# The name of the pluggable java class used to implement the Web Server.
ducc.ws.configuration.class=org.apache.uima.ducc.ws.config.WebServerConfiguration
# This endpoint is used for Web Server self test to determine viability of broker.
# After 3 missed messages to self via broker, Web Server considers broker to be down.
ducc.ws.state.update.endpoint=ducc.ws.state
# This is the JMS endpoint type used for the state messages sent by the Web Server.
ducc.ws.state.update.endpoint.type=topic
# The interval in milliseconds between Web Server publications of its state.
ducc.ws.state.publish.rate=5000
# The elapsed time in milliseconds between monitored head-node daemons' publications
# that if exceeded indicates "down". Default = 120000 (two minutes).
ducc.ws.monitored.daemon.down.millis.expiry=120000
# Optionally configure the Web Server to run on a non-head node
# This is the name of the node the web server is started on. If not specified,
# the web server is started on ${ducc.head}.
# ducc.ws.node = my.node.com
# Optionally configure the Web Server IP address
# In multi-homed systems it may be necessary to specify to which of the multiple addresses
# the Web Server listens for requests. This property is an IP address that specifies to which
# address the Web Server listens.
# ducc.ws.ipaddress = <fill in an IP address>
# Optionally configure the Web Server IP port for HTTP requests, default is 42133
ducc.ws.port = 42133
# Optionally configure the Web Server IP port for HTTPS requests, default is 42155
ducc.ws.port.ssl = 42155
# Optionally configure the Web Server welcome page, default is index.html (which forwards to jobs.html)
ducc.ws.welcome.page = index.html
# Optionally configure the Web Server DUCC_HOME display value, default is absolute path of DUCC_HOME.
ducc.ws.display.home =
# Optionally configure the Web Server job automatic cancel timeout, default is 10. To disable
# feature specify 0. Employed when user specifies --wait_for_completion flag on job submission,
# in which case the job monitor program must visit
# http://<host>:<port>/ducc-servlet/proxy-job-status?id=<job-id> within this expiry time
# else the job will be automatically canceled (unless feature disabled) by the Web Server
# acting as the administrator ducc (which must be specified in the ducc.administrators file).
ducc.ws.automatic.cancel.minutes = 5
# Optionally configure the Web Server max cached (and thus available for display)
# history entries for each of Jobs/Reservations/Services
ducc.ws.max.history.entries = 4096
# Specify login enabled (default is true)
ducc.ws.login.enabled = false
# For node visualization - if true, strip domain names from labels for cleaner visuals
ducc.ws.visualization.strip.domain = true
# Optionally configure the Web Server request log, default is 0 (meaning no request logging)
# Logs are written to DUCC_HOME/logs/webserver
ducc.ws.requestLog.RetainDays = 30
# Specify one of { unrestricted, encrypted, blocked } to control
# requests to the Web Server with responses containing user data.
# When "unrestricted" requests for user data via http or https are honored.
# When "encrypted" requests for user data only via https are honored.
# When "blocked" requests for user data are not honored.
ducc.ws.user.data.access = unrestricted
# Note: to employ "encrypted" use the following settings:
# ducc.ws.port = 42133
# ducc.ws.port.ssl = 42155
# ducc.ws.login.enabled = true
# ducc.ws.user.data.access = encrypted
# See documentation for further information.
# --------------------------------------------------------------
# name: ducc.ws.banner.message
# purpose: display banner message on all main pages
# choices: default=none
# change: effective immediately
#ducc.ws.banner.message = Do not adjust your set. This is a message from your DUCC administrator.
# NOTE - Feature under devlopment
# When set exposes the Experiments page and enables the ducc_jed_submit script
ducc.experiments = false
# +==================================================================================================+
# | Job Driver |
# +==================================================================================================+
# The name of the pluggable java class used to implement the Job Driver (JD).
ducc.jd.configuration.class=org.apache.uima.ducc.transport.configuration.jd.JobDriverConfiguration
# This is the JMS endpoint name by the Job Driver to send state to the Orchestrator.
ducc.jd.state.update.endpoint=ducc.jd.state
# This is the JMS message type used to send state to the Orchestrator.
ducc.jd.state.update.endpoint.type=topic
# The interval in milliseconds between JD state publications to the Orchestrator.
# A higher rate (smaller number) may slightly increase system response but will
# increase network load. A lower rate will somewhat decrease system response and
# lower network load.
ducc.jd.state.publish.rate=15000
# This is a human-readable string used to form queue names for the JMS queues used to pass
# CASs from the Job Driver to the Job Processes. The completed queue named comprises the prefix
# concatenated with the DUCC assigned Job number.
ducc.jd.queue.prefix=ducc.jd.queue.
# After dispatching a work item to UIMA-AS client for processing, the number of minutes that the
# Job Driver will wait for two callbacks (queued and assigned) before considering the work item
# lost. The elapsed time for the callbacks is normally sub-second. Intermittent network problems
# may cause unusual spikes. If not specified, default value is 5 minutes.
ducc.jd.queue.timeout.minutes=5
# If not specified, default value is 24 hrs (24*60 minutes)
# This property specifies the default value for the time, in minutes, that the JD should
# wait for a work-item to be processed. If processing has not completed in this time the
# process is terminated and the job's error handler decides whether to retry the
# work-item or not.
ducc.default.process.per.item.time.max = 1440
# If not specified, default max time in minutes allowed for AE initialization.
# This property specifies the default value for the time, in minutes, that the agent should
# wait for a JP to complete initialization. If initialization is not completed in this time
# the process is terminated and and InitializationTimeout status is sent to the JD
# which decides whether to retry the process or terminate the job.
ducc.default.process.init.time.max = 120
# The following 5 values comprise the specification used by the DUCC Orchestrator daemon to
# request an allocation from the DUCC Resource Manager for Job Driver use. The values given
# below are the defaults.
ducc.jd.host.class=JobDriver
ducc.jd.host.description=Job Driver
ducc.jd.host.memory.size=2GB
ducc.jd.host.number.of.machines=1
ducc.jd.host.user=System
# For a newly started Job, the number of JP UIMA initialization failures
# allowed until at least one JP succeeds - otherwise, the Job self-destructs.
# Default is 1.
ducc.jd.startup.initialization.error.limit=1
# The next 4 values are related - each JD is assigned a piece of the Job Driver host memory
# which, # along with the size of the CR's type system, limits the number of active work-item
# CASes in a job. To avoid swapping the max heap size should also be restricted.
# Memory size in MB allocated for each JD (default 300)
# When CGroups are enabled, this is the RSS, in MB, that is reserved for each JD process,
# and enforced by the CGroup support. Larger JDs are permitted, but the CGroup support will
# force the excess RSS onto swap. This potentially slows the performance of that JD, but
# preserves the resources for other, better-behaved, JDs.
ducc.jd.share.quantum = 400
# The number of "slices" of size "jd.share.quantum" kept in reserve.
# The Orchestrator makes Reservation requests to RM to get Reservations
# (Job Driver hosts) each of which is then subdivided into "slices", one
# per JD. This number specifies the number of unused "slices" that should
# be kept on-hand in anticipation of newly submitted jobs (default 2).
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# If the value specified is 0 then no JD allocation will take place
# and all submitted jobs will be rejected.
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ducc.jd.share.quantum.reserve.count = 3
# The maximum length of a work-item name returned by CAS.getDocumentText().
# Truncation is enforced beyond this limit.
# If not specified, there is no limit.
ducc.jd.workitem.name.maximum.length = 64
# Extra JVM args to be appended to any user-supplied "driver_jvm_args"
# Dynamic: changes to this property immediately affect new submitted work
# Flag: -DUimaAsCasTracking will add to logs:
# UIMA-AS client & service trigger internal tracing including RefId,
# UIMA-AS client onBeforeMessageSendHandler callbacks including RefId, and
# UIMA-AS client onBeforeProcessCAS callbacks including RefId
# Flag: -DUimaAsClientTracking will add to jd.out.log:
# core pool size changes,
# UIMA-AS client sendAndReceive invocations,
# UIMA-AS client onBeforeMessageSendHandler callbacks, and
# UIMA-AS client onBeforeProcessCAS callbacks
# Note: should contain an Xmx a little below the "ducc.jd.share.quantum" value
# the Xmx entry is ignored if the user has specified one
ducc.driver.jvm.args = -Xmx300M
# Max number of threads in Jetty thread pool servicing incoming
# HTTP requests
# ducc.driver.jetty.max.threads = 100
# Max idle time for jetty threads (in millis). When a thread exceeds
# its idle time it will be terminated.
# ducc.driver.jetty.thread.idletime = 60000
# Extra JVM args to be appended to any user-supplied "process_jvm_args"
# Dynamic: changes to this property immediately affect new submitted work
# ducc.process.jvm.args=-XX:+HeapDumpOnOutOfMemoryError
# --------------------------------------------------------------
# name: ducc.jd.communications.scheme
# purpose: specify communications scheme between JD and JPs
# choices: [http,https] default=https
# change: effective immediately for new jobs
ducc.jd.communications.scheme=https
# --------------------------------------------------------------
# name: ducc.jd.error.handler.class
# purpose: specify error handler
# default: org.apache.uima.ducc.ErrorHandler
# change: effective immediately for new jobs
# details: The error handler class is employed when work items fail or time-out in order
# to determine what to do next in three dimensions:
# job kill or continue, process kill or continue, work item kill or continue.
ducc.jd.error.handler.class = org.apache.uima.ducc.ErrorHandler
# --------------------------------------------------------------
# name: ducc.jd.error.handler.args
# purpose: configure error handler
# choices: max_job_errors=<e> max_timeout_retrys_per_workitem=<r>
# change: effective immediately for new jobs
# details: The error handler args passed to the error handler class (see above).
# These values, if any, are combined with any specified by the user.
# Individual user specified --driver_exception_handler_arguments <args>
# prevail.
ducc.jd.error.handler.args = max_job_errors=15 max_timeout_retrys_per_workitem=0
# +==================================================================================================+
# | Service Manager |
# +==================================================================================================+
# This is the name of the pluggable java class used to implement the Service Manager.
ducc.sm.configuration.class=org.apache.uima.ducc.sm.config.ServiceManagerConfiguration
# This is the JMS endpoint name used for API messages received by the Service Manager.
ducc.sm.api.endpoint=ducc.sm.api
# This is the JMS message type used for API messages received by the Service Manager.
ducc.sm.api.endpoint.type=queue
# This is the JMS endpoint name used for state messages sent by the Service Manager.
ducc.sm.state.update.endpoint=ducc.sm.state
# This is the JMS message type used for state messages sent by the Service Manager.
ducc.sm.state.update.endpoint.type=topic
# Default pinger
# This is the name of the default UIMA-AS ping/monitor class. The default class issues
# get-meta to a service and uses JMX to fetch queue statistics for presentation in
# the Web Server.
# This name is either:
# - The fully qualified name of the class to use as the default UIMA-AS pinger. It may
# be necessary to include the class or jar file in the classpath used to start the SM.
# (The recommended way to do this is add an entry to the ducc.local.jars property
# in ducc.properties.
# - The name of a pinger registration file. This is the recommended way to
# provide installation-customized pingers. In short, it resides in ducc.properties
# and contains the full set of ping-related properties needed to run a pinger.
ducc.sm.default.monitor.class = org.apache.uima.ducc.sm.UimaAsPing
# This is the maximum number of consecutive failures of service instance initialization
# permitted before DUCC stops creating new instances. When this cap is hit the SM
# will disable autostart for the service. It may be overridden by the service
# registration's instance_failures_limit parameter.
ducc.sm.instance.failure.max 5
# backward compatibility
ducc.sm.instance.failure.limit ${ducc.sm.instance.failure.max}
# This specifies a window of time in minutes over which some number of service instance
# failures are tolerated. If the maximum number of tolerated failures is
# exceeded within this time window the Service Manager ceases to restart
# instances automatically. The maximum tolerated failures is defined in
# ducc.sm.instance.failure.max.
# This may be overridden by individual service pingers using the registration
# property instance_failures_window.
ducc.sm.instance.failure.window 30
# max consecutive instance init failures before we stop trying to start things
ducc.sm.init.failure.limit 1
# This is the time, in milliseconds, between pings by the Service Manager
# to each known, running service.
ducc.sm.meta.ping.rate = 60000
# This is the number of consecutive pings that may be missed before a
# service is considered unavailable.
ducc.sm.meta.ping.stability = 10
# This is the time in milliseconds the SM waits for a response to a ping. If the service does
# not respond within this time the ping is accounted for as a "missed" ping.
ducc.sm.meta.ping.timeout = 15000
# This is the HTTP port used by the Service Manager to field requests from the CLI / API.
# *****
# NOTE: for backward CLI / API compatibility (2.2.0 and before) this port is the same as
# ducc.orchestrator.http.port. This entry can otherwise be deleted post-2.2.0 as it is no
# longer used by the current CLI / API.
# *****
ducc.sm.http.port=${ducc.orchestrator.http.port}
# This is the node where the Service Manager runs. It MUST be ${ducc.head}.
ducc.sm.http.node=${ducc.head}
# This is the length of time, in milliseconds, that the SM allows a service to remain alive after
# all jobs that reference it have exited. If no new job referencing it enters the system before this
# time has expired, the SM stops the service.
ducc.sm.default.linger=300000
# +==================================================================================================+
# | Orchestrator |
# +==================================================================================================+
# This is the name of the pluggable java class used to implement the DUCC Orchestrator.
ducc.orchestrator.configuration.class=org.apache.uima.ducc.orchestrator.config.OrchestratorConfiguration
# This indicates the level of recovery to be taken on restarting a
# system. There are three levels of startup:
# [cold] All reservations are canceled, all currently running
# jobs (if any) are terminated. All services are terminated. The
# system starts with no jobs, reservations, or services active.
# [warm] All active work is continued.
#------------------------------------------------------------------------------
# cold, // Recover: All active are forced to Completed JD host: employ new
# warm, // Recover: All Jobs+Services+Reservations (default) JD host: employ current
ducc.orchestrator.start.type=warm
#------------------------------------------------------------------------------
# This is the name of the JMS endpoint through which the Orchestrator broadcasts its
# state messages. These messages include full job information and can be relatively
# large, though they are now compressed.
ducc.orchestrator.state.update.endpoint=ducc.orchestrator.state
# This is the JMS endpoint type used for the state messages sent by the Orchestrator.
ducc.orchestrator.state.update.endpoint.type=topic
# The interval in milliseconds between Orchestrator publications of its state.
ducc.orchestrator.state.publish.rate=10000
#------------------------------------------------------------------------------
# How long between between maintenance cycles (in milliseconds)
# Each maintenance cycle the orchestrator removes obsolete JD queues (MqReaper)
# and performs health related activities (HealthMonitor) including: capping JPs
# for a Jobs that have too many initialization failures and terminating Jobs whose
# JDs have failed.
ducc.orchestrator.maintenance.rate=60000
#------------------------------------------------------------------------------
# This is the HTTP port used by the Orchestrator to field requests from the CLI / API.
ducc.orchestrator.http.port=19988
# Node where Orchestrator is running. It MUST be ${ducc.head}.
ducc.orchestrator.http.node=${ducc.head}
#------------------------------------------------------------------------------
# Specify if reserving an entire machine is allowed
# Allowing could be a bad idea when a cluster has very few, very large machines.
ducc.orchestrator.unmanaged.reservations.accepted=true
#------------------------------------------------------------------------------
# This is the name of the JMS endpoint through which the daemons broadcast state changes
ducc.daemons.state.change.endpoint=ducc.daemons.state.change
# This is the JMS endpoint type used for the state change messages sent by the daemons.
ducc.daemons.state.change.endpoint.type=queue
# +==================================================================================================+
# | Resource Manager (aka Scheduler) |
# +==================================================================================================+
# This is the name of the pluggable java class used to implement the DUCC Resource
# Manager.
ducc.rm.configuration.class=org.apache.uima.ducc.rm.config.ResourceManagerConfiguration
# This is the name of the JMS endpoint through which the Resource Manager broadcasts its
# state.
ducc.rm.state.update.endpoint=ducc.rm.state
# This is the JMS endpoint type used for state messages sent by the Resource Manager.
ducc.rm.state.update.endpoint.type=topic
# This specifies the frequency of RM schedules, relative to the number of Orchestrator publications.
# If the value is set to 1, RM runs and publishes a schedule immediately on receipt of OR state.
# If set to some number N, RM runs a schedule after receipt of every N Orchestrator publications.
ducc.rm.state.publish.ratio = 1
# This specifies the maximum non-preemptable shares any user may be awarded, in GB. If not configured,
# there is no maximum enforced. This can be overridden on a per-user basis in the user registry.
#ducc.rm.global_allotment = 360
# The share quantum is the smallest amount of RAM that is schedulable for jobs, in GB.
# Jobs are scheduled based entirely on their memory requirements. Memory is allocated in
# multiples of the share quantum.
ducc.rm.share.quantum = 1
# The component that implements the scheduling algorithm is pluggable. This specifies the
# name of that class.
ducc.rm.scheduler = org.apache.uima.ducc.rm.scheduler.NodepoolScheduler
# File defining the scheduler classes - found in DUCC_HOME/resources
ducc.rm.class.definitions = ducc.classes
# File defining the user registry. Optional, need not exist. Found in DUCC_HOME/resources.
# The registry is used only to override the global allotments. The registry entries may also
# be placed in the ducc.classes file if desired.
ducc.rm.user.registry = ducc.users
# default memory, in GB, if not specified
ducc.rm.default.memory = 4
# Number of node metrics heartbeats to wait for before rm starts up.
# During DUCC initialization the Resource Manager must wait some period of time for
# all the nodes in the cluster to check-in via their "heartbeats". If the RM were to start
# scheduling too soon there would be a period of significant "churn" as the perceived cluster
# configurations changes rapidly. As well, it would be impossible to recover work in a warm
# or hot start if the affected nodes had not yet checked in.
# The init.stability property indicates how many heartbeat intervals the RM must wait before
# it starts scheduling after initialization.
ducc.rm.init.stability = 2
# Number of missed node metrics updates to consider node down.
# The RM receives regular "heartbeats" from the DUCC agents in order to know what
# nodes are available for scheduling. The node.stability property configures the number of
# consecutive heartbeats that may be missed before the Resource Manager considers the
# node to be inoperative.
# If a node becomes inoperative, the Resource Manager deallocates all processes on that
# node and attempts to reallocate them on other nodes. The node is marked offline and is
# unusable until its heartbeats start up again.
# The default configuration declares the agent heartbeats to occur at 30 second intervals.
# (see ducc.agent.node.metrics.publish.rate)
# Therefore heartbeats must be missed for 3 minutes before the Resource Manager takes
# corrective action.
ducc.rm.node.stability = 6
# Which policy to use when shrinking/evicting shares - alternatively, SHRINK_BY_MACHINE.
# The eviction.policy is a heuristic to choose which processes of a job to preempt because of
# competition from other jobs.
# The SHRINK_BY_INVESTMENT (default) policy attempts to preempt processes such that the
# least amount of work is lost. It chooses candidates for eviction in order of:
# - Processes still initializing, with the smallest time spent in the initializing step.
# - Processes whose currently active work items have been executing for the shortest time.
# The SHRINK_BY_MACHINE policy attempts to preempt processes so as to minimize
# fragmentation on machines with large memories that can contain multiple job processes.
# No consideration of execution time or initialization time is made.
ducc.rm.eviction.policy = SHRINK_BY_INVESTMENT
# Max nodes to initially allocate until init is complete.
# The type of jobs supported by DUCC generally have very long and often fragile
# initialization periods. Errors in the applications and other problems such is missing or
# errant services can cause processes to fail during this phase.
# To avoid preempting running jobs and allocating a large number of resources to jobs only
# to fail during initialization, the Resource Manager schedules a small number of processes
# until it is determined that the initialization phase will succeed.
# The initialization.cap determines the maximum number of processes allocated to a job
# until at least one process successfully initializes. Once any process initializes the Resource
# Manager will proceed to allocate the job its full fair share of processes.
# The initialization cap can be overridden on a class basis by configuration via ducc.classes.
ducc.rm.initialization.cap = 1
# When true, jobs expand not all at once after init, but a bit slower, doubling each epoch
# until max fair-share is set. If false, jobs increase immediately to their fair share,
# at the cost of mass evictions.
# Expand.by.doubling can be overridden on a class basis by configuration via ducc.classes.
ducc.rm.expand.by.doubling = true
# Predict when a job will end and avoid expanding if not needed.
# Because initialization time may be very long, it may be the case that a job that might be
# eligible for expansion will be able to complete in the currently assigned shares before any
# new processes are able to complete their initialization. In this case expansion results in
# waste of resources and potential eviction of processes that need not be evicted.
# The Resource Manager monitors the rate of task completion and attempts to predict the
# maximum number of processes that will be needed at a time in the future based on the
# known process initialization time. If it is determined that expansion is unnecessary then it
# is not done for the job.
# Prediction can be overridden on a class basis by configuration via ducc.classes.
ducc.rm.prediction = true
# Add this fudge factor (milliseconds) to the expansion target when using prediction.
# When ducc.rm.prediction is enabled, the known initialization time of a job's processes plus
# some "fudge" factor is used to predict the number of future resources needed. The "fudge"
# is specified in milliseconds.
# The default "fudge" is very conservative. Experience and site policy should be used to set a
# more practical number.
# Prediction.fudge can be overridden on a class basis by configuration via ducc.classes.
ducc.rm.prediction.fudge = 120000
# What is minimum number of processes for a job before we do defrag? If a job has less than
# this amount RM may attempt defragmentation to bring the processes up to this value.
# If enabled, limited defragmentation of resources is performed by the Resource Manager to
# create sufficient space to schedule work that has insufficient resources (new jobs, for
# example.). The term "insufficient" is defined as "needing more processes than the
# defragmentation # threshold, but currently having fewer processes than the defragmentation
# threshold." These are called "needy" jobs. Additionally, the Resource Manager
# will never evict processes from ``needy'' jobs for the purpose of defragmentation.
# This property allows installations to customize the value used to determine if a
# job is "needy". Jobs with fewer processes than this are potentially needed, and
# jobs with more processes are never needy.
ducc.rm.fragmentation.threshold = 8
# This JMS endpoint used for RM administrative requests.
ducc.rm.admin.endpoint = ducc.rm.admin.channel
# This is the JMS endpoint type used for RM administrative requests.
ducc.rm.admin.endpoint.type = queue
# This JMS endpoint used for RM administrative requests.
ducc.rm.via.or.admin.endpoint = ducc.rm.via.or.admin.channel
# This is the JMS endpoint type used for RM administrative requests.
ducc.rm.via.or.admin.endpoint.type = queue
# Unmanaged Reservations are usually assigned to machines whose size exactly matches the
# requested size, after rounding up to a multiple of the node pool's quantum.
# When this property is positive, machines up to this many GB larger than the rounded up
# size will be considered.
ducc.rm.reserve_overage = 0
# +==================================================================================================+
# | Agents |
# +==================================================================================================+
# This is the name of the pluggable java class used to implement the DUCC Agents.
ducc.agent.configuration.class=org.apache.uima.ducc.agent.config.AgentConfiguration
# This is the JMS endpoint through which agents receive state from the Process Manager.
ducc.agent.request.endpoint=ducc.agent
# This is the JMS endpoint type used for state messages sent by the Process Manager.
ducc.agent.request.endpoint.type=topic
# This is the JMS endpoint used to communicate
# from the managed process to the Agent (Job Process).
ducc.agent.managed.process.state.update.endpoint=ducc.managed.process.state.update
# This is the JMS endpoint type used to communicate
# from the managed process (Job Process) to the Agent.
ducc.agent.managed.process.state.update.endpoint.type=socket
# Endpoint parameters which are transport specific.
# For socket transport params are:
# - transferExchange=true - include Camel Exchange wrapper in a message
# - synch=false - use socket transport for one-way messaging (no replies neeeded)
ducc.agent.managed.process.state.update.endpoint.params=transferExchange=true&sync=false
# ***** Note: the Resource Manager uses the data in the node metrics for scheduling.
# The interval in milliseconds between node metric publications.
# Every agent publishes its updates at this rate. On large clusters, a high rate (small
# interval) can be a burden on the network.
ducc.agent.node.metrics.publish.rate=30000
# This is the JMS endpoint used to send node metrics updates to listeners. Listeners
# are usually the Resource Manager and Web Server. These messages serve as node
# "heartbeats". As well, the node metrics heartbeats contain the amount of RAM on the node
# and the number of processors.
ducc.agent.node.metrics.endpoint=ducc.node.metrics
# This is the JMS endpoint type used to send node metrics updates from the agents.
ducc.agent.node.metrics.endpoint.type=topic
# The interval in milliseconds between node inventory publications.
# If the inventory has not changed since the last update the agent bypasses sending the
# update, up to a maximum of ducc.agent.node.inventory.publish.rate.skip times.
ducc.agent.node.inventory.publish.rate=10000
# This is the number of times the agent will bypass publishing its node inventory if the
# inventory has not changed.
ducc.agent.node.inventory.publish.rate.skip=3
# This is the JMS endpoint used to send node inventory messages to listeners. Listeners are
# usually the Orchestrator and Web Server. Information in these messages include a map of
# processes being managed on the node.
ducc.agent.node.inventory.endpoint=ducc.node.inventory
# This is the JMS endpoint type used to send node inventory updates from the agents.
ducc.agent.node.inventory.endpoint.type=topic
# ducc.agent.launcher.thread.pool.size - Deprecated
#ducc.agent.launcher.thread.pool.size=10
# enable/disable use of ducc_ling
# ducc.agent.launcher.use.ducc_spawn: true, use ducc_ling. Default: false
ducc.agent.launcher.use.ducc_spawn=true
# Specify location of ducc_ling in the filesystem.
# This property specifies the full path to the ducc_ling utility. During installation ducc_ling
# is normally moved to local disk and given setuid-root privileges. Use this property to tell
# the DUCC agents the location of the installed ducc_ling. The default location is within
# an architecture dependent subdiretory of DUCC_HOME/admin.
# The arcitecture is derived from the JRE property os.arch. During DUCC installation
# the ducc_ling utility is compiled for the architecture of the host where DUCC is installed.
# In heterogeneous clusters, the system administrator should run the utility build_duccling
# once on a machine of each architecture to insure this utility gets correctly installed.
ducc.agent.launcher.ducc_spawn_path=${DUCC_HOME}/admin/${os.arch}/ducc_ling
# Max amount of time (in millis) agent allows the process to stop before issuing kill -9.
# This property specifies the time, in milliseconds, the agent should wait before forcibly
# terminating a job process (JP) after an attempted graceful shutdown. If the child process
# does not terminate in the specified time, it is forcibly terminated with kill -9.
# This type of stop can occur because of preemption or system shutdown.
ducc.agent.launcher.process.stop.timeout=60000
# Max time in millis allowed for AE initialization. Default 2 hours 7200000. - Deprecated
#ducc.agent.launcher.process.init.timeout=7200000
# Exclude the following user ids while detecting rogue processes
# The DUCC Agents scan nodes for processes that should not be running; for example,
# a job may have left a 'rogue' process alive when it exits, or a user may log in to a node
# unexpectedly. These processes are reported to the administrators via the webserver for
# possible action.
# This configuration parameter enumerates userids which are ignored by the rogue-process scan.
# default = root,posstfix,ntp,nobody,daemon,100
ducc.agent.rogue.process.user.exclusion.filter=
# Max UID reserved by OS. This is used to detect rogue processes and to report
# available memory on a node.
# The ducc.agent.node.metrics.sys.uid.max property is droped in favor of
# ducc.agent.rogue.process.sys.user.max, which seems more descriptive
#
# ducc.agent.node.metrics.sys.uid.max=500
ducc.agent.rogue.process.sys.uid.max=500
# Exclude the following processes while detecting rogue processes
# The DUCC Agents scan nodes for processes that should not be running; for example,
# a job may have left a 'rogue' process alive when it exits, or a user may log in to a node
# unexpectedly. These processes are reported to the administrators via the webserver for
# possible action.
# This configuration parameter enumerates processes by name which are ignored by the
# rogue process detector.
# default = sshd:,-bash,-sh,/bin/sh,/bin/bash,grep,ps
ducc.agent.rogue.process.exclusion.filter=sshd:,-bash,-sh,/bin/sh,/bin/bash,grep,ps
# Fudge Factor (in terms of percantage) that agent uses to multiply a share size when
# determining if a JP exceeds its alloted memory, which is calculated as follows
# (fudge factor/100)*share size + share size. If this number exceeds JPs RSS, the agent
# kills the process.
# The DUCC agent monitors the size of the resident memory of its spawned processes. If a
# process exceeds its declared memory size by any significant amount it is terminated and
# a ShareSizeExceeded message is sent. The Job Driver counts this towards the maximum
# errors for the job and will eventually terminate the job if excessive such errors occur.
# This property defines the percentage over the declared memory size that a process is
# allowed to grow to before being terminated.
# To disable this feature, set the value to -1.
ducc.agent.share.size.fudge.factor=5
# Enable/Disable CGroup support.
# If CGroups are not installed on a specific machine, this is ignored.
# With CGroups the RSS for a managed process (plus any children processes it may spawn) is
# limited to the allocated share size. Additional memory use goes to swap space. DUCC
# monitors and limits swap use to the same proportion of total swap space as allocated
# share size is to total RAM. If a process exceeds its allowed swap space it is terminated
# and a ShareSizeExceeded message is sent to the Job Driver.
# Nodes not using CGroups fall back to the ducc.agent.share.size.fudge.factor.
ducc.agent.launcher.cgroups.enable=true
# Define location of cgroups utils like cgexec. For mixed clusters with different
# flavors of unix, the utilities may be in different places. In such case, add a
# a comma separated list of paths like this:
# ducc.agent.launcher.cgroups.utils.dir=/usr/bin,<OS 1 path>,<OS 2 path>
ducc.agent.launcher.cgroups.utils.dir=/usr/bin,/bin
# Set cgroup memory.swappiness
ducc.agent.launcher.cgroups.swappiness=10
# Number of retries to use when cgcreate fails
ducc.agent.launcher.cgroups.max.retry.count=1
# Amount by which to increase delay (in msecs) between each retry
# Defines a delay factor in millis an agent will use to determine how long it
# should sleep between cgroup create retries. Each time cgcreate fails
# an agent will increment the delay by adding the delay factor to a previous
# delay. If the delay factor is 2000, on first cgcreate failure an agent will
# wait 2000 ms, on second failure the wait time will be 4000, on third 6000, and
# so on.
ducc.agent.launcher.cgroups.retry.delay.factor=2000
# Exclusion file to enable node based exclusion for cgroups and aps
# syntax: <node>=cgroups,ap
# the above will exclude node from using cgroups and/or prevent deployment of APs
ducc.agent.exclusion.file=${DUCC_HOME}/resources/exclusion.nodes
# Define script which will collect total swap used by a given process. This
# script is launched by an agent via duccling and running as the owner
# of the process.
ducc.agent.swap.usage.script=${DUCC_HOME}/admin/ducc_get_process_swap_usage.sh
# Uncomment the following line to support auto reaping of rogue processes by Ducc's Agent
# ducc.agent.rogue.process.reaper.script=${DUCC_HOME}/admin/ducc_reap_rogues.sh
# +==================================================================================================+
# | Process Manager |
# +==================================================================================================+
# This is the name of the pluggable java class used to implement the DUCC Process Manager.
ducc.pm.configuration.class=org.apache.uima.ducc.pm.config.ProcessManagerConfiguration
# This is the endpoint through which process manager receive state from the Orchestrator.
ducc.pm.request.endpoint=ducc.pm
# This is the JMS endpoint type used for state messages sent by the Orchestrator.
ducc.pm.request.endpoint.type=queue
# This is the endpoint through which process manager sends its heartbeat. The main receiver
# is the Web Server for it's daemon status page.
ducc.pm.state.update.endpoint=ducc.pm.state
# This is the JMS endpoint type used for process manager heartbeats. The primary receiver
# is the Web Server for its daemon status page.
ducc.pm.state.update.endpoint.type=topic
# The interval in milliseconds between process manager heartbeat publications.
ducc.pm.state.publish.rate=15000
# +==================================================================================================+
# | Job Process |
# +==================================================================================================+
# This is the name of the pluggable java class that implements
# the shell for Agent launched user processes.
ducc.uima-as.configuration.class=org.apache.uima.ducc.transport.configuration.jp.JobProcessConfiguration
ducc.job-process.configuration.class=org.apache.uima.ducc.transport.configuration.jp.JobProcessConfiguration
ducc.service.configuration.class=org.apache.uima.ducc.transport.configuration.service.ServiceConfiguration
# This is the endpoint through which job processes (JPs) receive messages from the Agents.
ducc.uima-as.endpoint=ducc.job.managed.service
# This is the JMS endpoint type used for messages sent to the JPs from the Agents.
ducc.uima-as.endpoint.type=socket
# Endpoint parameters which are transport specific.
# For socket transport params are:
# - transferExchange=true - include Camel Exchange wrapper in a message
# - synch=false - use socket transport for one-way messaging (no replies neeeded)
ducc.uima-as.endpoint.params=transferExchange=true&sync=false
# saxon8.jar - Saxon is an XSLT and XQuery processor. It is used by UIMA-AS
# when deploying a service.
ducc.uima-as.saxon.jar.path=file:${DUCC_HOME}/apache-uima/saxon/saxon8.jar
# dd2spring.xsl - contains a set of rules used when transforming
# UIMA-AS deployment descriptor into SpringFramework's
# context file used to deploy UIMA-AS based service.
ducc.uima-as.dd2spring.xsl.path=${DUCC_HOME}/apache-uima/bin/dd2spring.xsl
# Maximum amount of time to wait for a response from the JD. This value
# is used by the JP when sending requests to the JD.
ducc.process.request.timeout=30000
# Define process container class for DD jobs to instantiate and invoke via reflection.
# The container provides classpath isolation for user defined analytics.
# The container is instantiated with classes from a System classloader.
ducc.process.uima.as.container.class = org.apache.uima.ducc.user.jp.UimaASProcessContainer
# Define process container class for non-DD jobs to instantiate and invoke via reflection.
# The container provides classpath isolation for user defined analytics.
# The container is instantiated with classes from a System classloader.
ducc.process.uima.container.class = org.apache.uima.ducc.user.jp.UimaProcessContainer
# Define the sleep time (millis) for JP to use when JD sends empty CAS. In this case the
# JD's CR has processed its collection. The JP threads need to slow down sending
# requests
ducc.process.thread.sleep.time=60000
# Custom Flow Controller to use for Ducc Job Processes that don't provide a process_DD descriptor
ducc.flow-controller.specifier=org.apache.uima.ducc.FlowController
# +==================================================================================================+
# | Database |
# +==================================================================================================+
# Name of the database host(s) - initially disabled until database is created.
# Overridden in site.ducc.properties by ducc_post_install or db_create.
# This is the name of the host(s) where the database is run. It usually defaults to the
# same host as the ducc.head. Those knowledgable of the database can install the
# database elsewhere. Use this parameter to specify that location(s).
# To disable use of the database, set this parameter to the string --disabled--
ducc.database.host.list = --disabled--
# Database JMX host. Default is to allow only local JMX access. Specify the real name for remote access.
ducc.database.jmx.host = localhost
# Database JMX port.
ducc.database.jmx.port = 7199
# If set to true, DUCC will start and stop the Cassandra database as part of its normal
# start/stop scripting.
ducc.database.automanage = true
# Path to Cassandra database home
ducc.database.home = ${DUCC_HOME}/cassandra-server
#-----------------------------------
# DATABASE MEMORY TUNING
# WARNING: if you're not sure what these do you should probably not change them as incorrect
# values can adversly affect the database, the database node, or both.
# Given that. If you're running on a mostly small system, e.g. 8GB or less, probably you
# should set ducc.database.mem.heap to 1 or 2 GB and ducc.database.mem.new to 100M
#
# By default we allow cassandra to calculate these for us.
#
# IMPORTANT: if you set one of these you MUST set both, or cassandra will refuse to start.
#-----------------------------------
# Database xmx. If not set, cassandra will calculate it for you.
# This is the value used to set {\em Xmx and Xms} when the database starts. The
# Cassandra database makes an attempt to determine the best value of this. The
# default is one-half of real memory, up to a maximum of 8G. It is recommended that
# the default be used. However, small installations may reduce this to as little
# as 512M. Note that both Xmx and Xms are set.
#ducc.database.mem.heap = 4G
# Database setting for 'young' generation. If not set, cassandra will calculate it for you.
#ducc.database.mem.new = 100M
#-----------------------------------
k# END OF DATABASE MEMORY TUNING
#-----------------------------------
# This class implementes the persistence interface for the services registry
#ducc.service.persistence.impl = org.apache.uima.ducc.database.StateServicesDb
ducc.service.persistence.impl = org.apache.uima.ducc.common.persistence.services.StateServices
# This class implements the persistence interface for Job, Reservation, Service, and AP instances, plus
# the Orchestrator checkpoint.
#ducc.job.history.impl = org.apache.uima.ducc.database.HistoryManagerDb
ducc.job.history.impl = org.apache.uima.ducc.transport.event.common.history.HistoryPersistenceManager
#ducc.rm.persistence.impl = org.apache.uima.ducc.database.RmStatePersistence
ducc.rm.persistence.impl = org.apache.uima.ducc.common.persistence.rm.NullRmStatePersistence
# end database things -------------------------------------------------------------------------------