blob: 21add70bf645a5e8a60730c595fe87207f739b23 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
########### These all have default values as shown
# We would specify the unique sink-id first
sinks: []
########### Now we would specify the detailed configuration for every unique sink
########### Syntax: sink-id: - option(s)
########### option class is required as we need to instantiate a new instance by reflection
########### option flush-frequency-ms is required to invoke flush() at interval
########### option sink-restart-attempts, representsing # of times to restart a sink when it throws exceptions and dies.
########### If this option is missed, default value 0 would be supplied; negative value represents to restart it forever.
########### Other options would be constructed as an immutable map passed to IMetricsSink's init(Map conf) as argument,
########### We would be able to fetch value by conf.get(options), for instance:
########### We could get "org.apache.heron.metricsmgr.sink.FileSink" if conf.get("class") is called inside file-sink's instance
### Config for file-sink
file-sink:
class: "org.apache.heron.metricsmgr.sink.FileSink"
flush-frequency-ms: 60000 # 1 min
sink-restart-attempts: -1 # Forever
filename-output: "metrics.json" # File for metrics to write to
file-maximum: 5 # maximum number of file saved in disk
### Config for tmanager-sink
tmanager-sink:
class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink"
flush-frequency-ms: 60000
sink-restart-attempts: -1 # Forever
tmanager-location-check-interval-sec: 5
tmanager-client:
reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient
# The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based
network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes
socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes
tmanager-metrics-type:
"__emit-count": SUM
"__execute-count": SUM
"__fail-count": SUM
"__ack-count": SUM
"__complete-latency": AVG
"__execute-latency": AVG
"__process-latency": AVG
"__jvm-uptime-secs": LAST
"__jvm-process-cpu-load": LAST
"__jvm-memory-used-mb": LAST
"__jvm-memory-mb-total": LAST
"__jvm-gc-collection-time-ms": LAST
"__server/__time_spent_back_pressure_initiated": SUM
"__time_spent_back_pressure_by_compid": SUM
### Config for metricscache-sink
metricscache-sink:
class: "org.apache.heron.metricsmgr.sink.metricscache.MetricsCacheSink"
flush-frequency-ms: 60000
sink-restart-attempts: -1 # Forever
metricscache-location-check-interval-sec: 5
metricscache-client:
reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient
# The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based
network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes
socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes
metricscache-metrics-type:
"__emit-count": SUM
"__execute-count": SUM
"__fail-count": SUM
"__ack-count": SUM
"__complete-latency": AVG
"__execute-latency": AVG
"__process-latency": AVG
"__jvm-uptime-secs": LAST
"__jvm-process-cpu-load": LAST
"__jvm-memory-used-mb": LAST
"__jvm-memory-mb-total": LAST
"__jvm-gc-collection-time-ms": LAST
"__server/__time_spent_back_pressure_initiated": SUM
"__time_spent_back_pressure_by_compid": SUM
### Config for prometheus-sink
# prometheus-sink:
# class: "org.apache.heron.metricsmgr.sink.PrometheusSink"
# port: 8080 # The port on which to run (either port or port-file are mandatory)
# path: /metrics # The path on which to publish the metrics (mandatory)
# flat-metrics: true # By default the web-sink will publish a flat "name -> value" json map
# include-topology-name: true # Include topology name in metric name (default false)
# metrics-cache-max-size: 1000000 # Max number of metrics cached and published (default 1000000)
# metrics-cache-ttl-sec: 600 # Time in seconds after which a metric that was collected will stopped being published (default 600)
# rules:
# # __jvm-peak-usage/G1-Survivor-Space-committed": "9",
# - pattern: __jvm-(.+)/(.+)
# name: jvm_$1_$2
# attrNameSnakeCase: true
# type: COUNTER
# # "__execute-time-ns/pulsar-prod-4/default": "418764",
# - pattern: __(?!jvm-+)(.+-count|.+-latency|.+-count|.+-time-ns)/(.+)/(.+)
# name: $1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_component: "$2"
# context: "$3"
# # "__execute-time-ns/pulsar-prod-4": "418764",
# - pattern: __(?!jvm-+)(.+-count|.+-latency|.+-count|.+-time-ns)/(.+)
# name: $1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# context: "$2"
# # StreamManager
# # "__client_stmgr-17/__bytes_to_stmgrs": "7841039",
# - pattern: __(client_stmgr-.+)/__(.+_to_stmgrs)
# name: $2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_component: "$1"
# # "__connection_buffer_by_instanceid/container_1_pulsar-prod-9_201/bytes": "0.000000",
# - pattern: __(connection_buffer_by_instanceid)/container_(.+)_(.+)/(.+)
# name: $1_$4
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_container: "$2"
# dest_task: "$3"
# # "__time_spent_back_pressure_by_compid/container_1_pulsar-prod-5_151": "0",
# - pattern: __(time_spent_back_pressure_by_compid)/container_(.+)_(.+)
# name: $1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_container: "$2"
# dest_task: "$3"
# # PulsarSpoutMetrics of PulsarSpout 'PulsarSpoutMetrics/pulsar-prod-4-0/consumerThroughput'
# - pattern: PulsarSpout/(.+)/(.+)
# name: pulsar_spout_$2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# component: "$1"
# - pattern: PulsarBolt/(.+)/(.+)
# name: pulsar_bolt_$2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# component: "$1"
# # name: "kafkaConsumer-request-size-max/consumer-node-metrics/client-id-spout/node-id-node-1"
# - pattern: kafkaConsumer-(.+)/consumer-(node)-metrics/client-id-(.+)/node-id-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# node_id: "$4"
# # name: "kafkaConsumer-commit-rate/consumer-coordinator-metrics/client-id-spout"
# - pattern: kafkaConsumer-(.+)/consumer-(coordinator)-metrics/client-id-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# # name: "kafkaConsumer-records-lag-max/consumer-fetch-manager-metrics/client-id-spout/topic-nginx-lad-es/partition-1"
# - pattern: kafkaConsumer-(.+)/consumer-(fetch-manager)-metrics/client-id-(.+)/topic-(.+)/partition-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# topic: "$4"
# partition: "$5"
# # name: "kafkaConsumer-records-per-request-avg/consumer-fetch-manager-metrics/client-id-spout/topic-nginx-adp-cms-api"
# - pattern: kafkaConsumer-(.+)/consumer-(fetch-manager)-metrics/client-id-(.+)/topic-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# topic: "$4"
# # name: "kafkaConsumer-bytes-consumed-total/consumer-fetch-manager-metrics/client-id-consumer-1"
# - pattern: kafkaConsumer-(.+)/consumer-(feath-manager)-metrics/client-id-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# - pattern: kafkaConsumer-(.+)/consumer-metrics/client-id-(.+)/node-id-(.+)
# name: kafka_consumer_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$2"
# node_id: "$3"
# - pattern: kafkaConsumer-(.+)/consumer-metrics/client-id-(.+)
# name: kafka_consumer_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$2"
# - pattern: kafkaConsumer-(.+)/app-info/client-id-(.+)
# name: kafka_consumer_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$2"
# # kafkaOffset of KafkaSpout 'kafkaOffset/topicName/partition_2/spoutLag'
# - pattern: kafkaOffset/(.+)/partition_([0-9]+)/(.+)
# name: kafka_offset_$3
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# topic: "$1"
# partition: "$2"
# # kafkaOffset of KafkaSpout 'kafkaOffset/topicName/totalSpoutLag'
# - pattern: kafkaOffset/(.+)/(.+)
# name: kafka_offset_$2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# topic: "$1"
### Config for scribe-sink
# scribe-sink:
# class: "org.apache.heron.metricsmgr.sink.ScribeSink"
# flush-frequency-ms: 60000
# sink-restart-attempts: -1 # Forever
# scribe-host: "127.0.0.1" # The host of scribe to be exported metrics to
# scribe-port: 1463 # The port of scribe to be exported metrics to
# scribe-category: "scribe-category" # The category of the scribe to be exported metrics to
# service-namespace: "heron" # The service name of the metrics in scribe-category
# scribe-timeout-ms: 200 # The timeout in seconds for metrics manager to write metrics to scribe
# scribe-connect-server-attempts: 2 # The maximum retry attempts to connect to scribe server
# scribe-retry-attempts: 5 # The maximum retry attempts to write metrics to scribe
# scribe-retry-interval-ms: 100 # The interval to retry to write metrics to scribe
### Config for graphite-sink
### Currently the graphite-sink is disabled
# graphite-sink:
# class: "org.apache.heron.metricsmgr.sink.GraphiteSink"
# flush-frequency-ms: 60000
# graphite_host: "127.0.0.1" # The host of graphite to be exported metrics to
# graphite_port: 2004 # The port of graphite to be exported metrics to
# metrics_prefix: "heron" # The prefix of every metrics
# server_max_reconnect-attempts: 20 # The max reconnect attempts when failing to connect to graphite server
### Config for web-sink
### The web-sink publishes metrics as json on http://host:port/path
# web-sink:
# class: "org.apache.heron.metricsmgr.sink.WebSink"
# port: 8080 # The port on which to run (either port or port-file are mandatory)
# # port-file: metrics.port # Alternatively supply a file which contains the port (text file containing single integer)
# path: /metrics.json # The path on which to publish the metrics (mandatory)
# flat-metrics: true # By default the web-sink will publish a flat "name -> value" json map
# # setting flat-metrics to "false" will group metrics by metric source (default ture)
# include-topology-name: false # Include topology name in metric name (default false)
# metrics-cache-max-size: 1000000 # Max number of metrics cached and published (default 1000000)
# metrics-cache-ttl-sec: 600 # Time in seconds after which a metric that was collected will stopped being published (default 600)