blob: 6cac2f13c9e658ff504d444a9f0e9ada00996809 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
########### These all have default values as shown
# We would specify the unique sink-id first
sinks:
- file-sink
- tmanager-sink
- metricscache-sink
########### Now we would specify the detailed configuration for every unique sink
########### Syntax: sink-id: - option(s)
########### option class is required as we need to instantiate a new instance by reflection
########### option flush-frequency-ms is required to invoke flush() at interval
########### option sink-restart-attempts, representsing # of times to restart a sink when it throws exceptions and dies.
########### If this option is missed, default value 0 would be supplied; negative value represents to restart it forever.
########### Other options would be constructed as an immutable map passed to IMetricsSink's init(Map conf) as argument,
########### We would be able to fetch value by conf.get(options), for instance:
########### We could get "org.apache.heron.metricsmgr.sink.FileSink" if conf.get("class") is called inside file-sink's instance
### Config for file-sink
file-sink:
class: "org.apache.heron.metricsmgr.sink.FileSink"
flush-frequency-ms: 60000 # 1 min
sink-restart-attempts: -1 # Forever
filename-output: "metrics.json" # File for metrics to write to
file-maximum: 5 # maximum number of file saved in disk
### Config for tmanager-sink
tmanager-sink:
class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink"
flush-frequency-ms: 60000
sink-restart-attempts: -1 # Forever
tmanager-location-check-interval-sec: 5
tmanager-client:
reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient
# The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based
network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes
socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes
tmanager-metrics-type:
"__emit-count": SUM
"__execute-count": SUM
"__fail-count": SUM
"__ack-count": SUM
"__complete-latency": AVG
"__execute-latency": AVG
"__process-latency": AVG
"__jvm-uptime-secs": LAST
"__jvm-process-cpu-load": LAST
"__jvm-memory-used-mb": LAST
"__jvm-memory-mb-total": LAST
"__jvm-gc-collection-time-ms": LAST
"__server/__time_spent_back_pressure_initiated": SUM
"__time_spent_back_pressure_by_compid": SUM
### Config for metricscache-sink
metricscache-sink:
class: "org.apache.heron.metricsmgr.sink.metricscache.MetricsCacheSink"
flush-frequency-ms: 60000
sink-restart-attempts: -1 # Forever
metricscache-location-check-interval-sec: 5
metricscache-client:
reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient
# The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based
network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager
network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt
socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes
socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes
metricscache-metrics-type:
"__emit-count": SUM
"__execute-count": SUM
"__fail-count": SUM
"__ack-count": SUM
"__complete-latency": AVG
"__execute-latency": AVG
"__process-latency": AVG
"__jvm-uptime-secs": LAST
"__jvm-process-cpu-load": LAST
"__jvm-memory-used-mb": LAST
"__jvm-memory-mb-total": LAST
"__jvm-gc-collection-time-ms": LAST
"__server/__time_spent_back_pressure_initiated": SUM
"__time_spent_back_pressure_by_compid": SUM
### Config for prometheus-sink
# prometheus-sink:
# class: "org.apache.heron.metricsmgr.sink.PrometheusSink"
# port: 8080 # The port on which to run (either port or port-file are mandatory)
# path: /metrics # The path on which to publish the metrics (mandatory)
# flat-metrics: true # By default the web-sink will publish a flat "name -> value" json map
# include-topology-name: true # Include topology name in metric name (default false)
# metrics-cache-max-size: 1000000 # Max number of metrics cached and published (default 1000000)
# metrics-cache-ttl-sec: 600 # Time in seconds after which a metric that was collected will stopped being published (default 600)
# rules:
# # __jvm-peak-usage/G1-Survivor-Space-committed": "9",
# - pattern: __jvm-(.+)/(.+)
# name: jvm_$1_$2
# attrNameSnakeCase: true
# type: COUNTER
# # "__execute-time-ns/pulsar-prod-4/default": "418764",
# - pattern: __(?!jvm-+)(.+-count|.+-latency|.+-count|.+-time-ns)/(.+)/(.+)
# name: $1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_component: "$2"
# context: "$3"
# # "__execute-time-ns/pulsar-prod-4": "418764",
# - pattern: __(?!jvm-+)(.+-count|.+-latency|.+-count|.+-time-ns)/(.+)
# name: $1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# context: "$2"
# # StreamManager
# # "__client_stmgr-17/__bytes_to_stmgrs": "7841039",
# - pattern: __(client_stmgr-.+)/__(.+_to_stmgrs)
# name: $2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_component: "$1"
# # "__connection_buffer_by_instanceid/container_1_pulsar-prod-9_201/bytes": "0.000000",
# - pattern: __(connection_buffer_by_instanceid)/container_(.+)_(.+)/(.+)
# name: $1_$4
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_container: "$2"
# dest_task: "$3"
# # "__time_spent_back_pressure_by_compid/container_1_pulsar-prod-5_151": "0",
# - pattern: __(time_spent_back_pressure_by_compid)/container_(.+)_(.+)
# name: $1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# dest_container: "$2"
# dest_task: "$3"
# # PulsarSpoutMetrics of PulsarSpout 'PulsarSpoutMetrics/pulsar-prod-4-0/consumerThroughput'
# - pattern: PulsarSpout/(.+)/(.+)
# name: pulsar_spout_$2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# component: "$1"
# - pattern: PulsarBolt/(.+)/(.+)
# name: pulsar_bolt_$2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# component: "$1"
# # name: "kafkaConsumer-request-size-max/consumer-node-metrics/client-id-spout/node-id-node-1"
# - pattern: kafkaConsumer-(.+)/consumer-(node)-metrics/client-id-(.+)/node-id-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# node_id: "$4"
# # name: "kafkaConsumer-commit-rate/consumer-coordinator-metrics/client-id-spout"
# - pattern: kafkaConsumer-(.+)/consumer-(coordinator)-metrics/client-id-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# # name: "kafkaConsumer-records-lag-max/consumer-fetch-manager-metrics/client-id-spout/topic-nginx-lad-es/partition-1"
# - pattern: kafkaConsumer-(.+)/consumer-(fetch-manager)-metrics/client-id-(.+)/topic-(.+)/partition-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# topic: "$4"
# partition: "$5"
# # name: "kafkaConsumer-records-per-request-avg/consumer-fetch-manager-metrics/client-id-spout/topic-nginx-adp-cms-api"
# - pattern: kafkaConsumer-(.+)/consumer-(fetch-manager)-metrics/client-id-(.+)/topic-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# topic: "$4"
# # name: "kafkaConsumer-bytes-consumed-total/consumer-fetch-manager-metrics/client-id-consumer-1"
# - pattern: kafkaConsumer-(.+)/consumer-(feath-manager)-metrics/client-id-(.+)
# name: kafka_consumer_$2_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$3"
# - pattern: kafkaConsumer-(.+)/consumer-metrics/client-id-(.+)/node-id-(.+)
# name: kafka_consumer_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$2"
# node_id: "$3"
# - pattern: kafkaConsumer-(.+)/consumer-metrics/client-id-(.+)
# name: kafka_consumer_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$2"
# - pattern: kafkaConsumer-(.+)/app-info/client-id-(.+)
# name: kafka_consumer_$1
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# client_id: "$2"
# # kafkaOffset of KafkaSpout 'kafkaOffset/topicName/partition_2/spoutLag'
# - pattern: kafkaOffset/(.+)/partition_([0-9]+)/(.+)
# name: kafka_offset_$3
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# topic: "$1"
# partition: "$2"
# # kafkaOffset of KafkaSpout 'kafkaOffset/topicName/totalSpoutLag'
# - pattern: kafkaOffset/(.+)/(.+)
# name: kafka_offset_$2
# attrNameSnakeCase: true
# type: COUNTER
# labels:
# topic: "$1"
### Config for graphite-sink
### Currently the graphite-sink is disabled
# graphite-sink:
# class: "org.apache.heron.metricsmgr.sink.GraphiteSink"
# flush-frequency-ms: 60000
# graphite_host: "127.0.0.1" # The host of graphite to be exported metrics to
# graphite_port: 2004 # The port of graphite to be exported metrics to
# metrics_prefix: "heron" # The prefix of every metrics
# server_max_reconnect-attempts: 20 # The max reconnect attempts when failing to connect to graphite server