| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ########### These all have default values as shown |
| |
| # We would specify the unique sink-id first |
| sinks: |
| - file-sink |
| - tmanager-sink |
| - metricscache-sink |
| |
| ########### Now we would specify the detailed configuration for every unique sink |
| ########### Syntax: sink-id: - option(s) |
| |
| ########### option class is required as we need to instantiate a new instance by reflection |
| ########### option flush-frequency-ms is required to invoke flush() at interval |
| ########### option sink-restart-attempts, representsing # of times to restart a sink when it throws exceptions and dies. |
| ########### If this option is missed, default value 0 would be supplied; negative value represents to restart it forever. |
| |
| ########### Other options would be constructed as an immutable map passed to IMetricsSink's init(Map conf) as argument, |
| ########### We would be able to fetch value by conf.get(options), for instance: |
| ########### We could get "org.apache.heron.metricsmgr.sink.FileSink" if conf.get("class") is called inside file-sink's instance |
| |
| ### Config for file-sink |
| file-sink: |
| class: "org.apache.heron.metricsmgr.sink.FileSink" |
| flush-frequency-ms: 60000 # 1 min |
| sink-restart-attempts: -1 # Forever |
| filename-output: "metrics.json" # File for metrics to write to |
| file-maximum: 5 # maximum number of file saved in disk |
| |
| ### Config for tmanager-sink |
| tmanager-sink: |
| class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" |
| flush-frequency-ms: 60000 |
| sink-restart-attempts: -1 # Forever |
| tmanager-location-check-interval-sec: 5 |
| tmanager-client: |
| reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient |
| # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based |
| network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager |
| network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt |
| network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager |
| network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt |
| socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes |
| socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes |
| tmanager-metrics-type: |
| "__emit-count": SUM |
| "__execute-count": SUM |
| "__fail-count": SUM |
| "__ack-count": SUM |
| "__complete-latency": AVG |
| "__execute-latency": AVG |
| "__process-latency": AVG |
| "__jvm-uptime-secs": LAST |
| "__jvm-process-cpu-load": LAST |
| "__jvm-memory-used-mb": LAST |
| "__jvm-memory-mb-total": LAST |
| "__jvm-gc-collection-time-ms": LAST |
| "__server/__time_spent_back_pressure_initiated": SUM |
| "__time_spent_back_pressure_by_compid": SUM |
| |
| ### Config for metricscache-sink |
| metricscache-sink: |
| class: "org.apache.heron.metricsmgr.sink.metricscache.MetricsCacheSink" |
| flush-frequency-ms: 60000 |
| sink-restart-attempts: -1 # Forever |
| metricscache-location-check-interval-sec: 5 |
| metricscache-client: |
| reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient |
| # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based |
| network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager |
| network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt |
| network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager |
| network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt |
| socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes |
| socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes |
| metricscache-metrics-type: |
| "__emit-count": SUM |
| "__execute-count": SUM |
| "__fail-count": SUM |
| "__ack-count": SUM |
| "__complete-latency": AVG |
| "__execute-latency": AVG |
| "__process-latency": AVG |
| "__jvm-uptime-secs": LAST |
| "__jvm-process-cpu-load": LAST |
| "__jvm-memory-used-mb": LAST |
| "__jvm-memory-mb-total": LAST |
| "__jvm-gc-collection-time-ms": LAST |
| "__server/__time_spent_back_pressure_initiated": SUM |
| "__time_spent_back_pressure_by_compid": SUM |
| |
| ### Config for prometheus-sink |
| # prometheus-sink: |
| # class: "org.apache.heron.metricsmgr.sink.PrometheusSink" |
| # port: 8080 # The port on which to run (either port or port-file are mandatory) |
| # path: /metrics # The path on which to publish the metrics (mandatory) |
| # flat-metrics: true # By default the web-sink will publish a flat "name -> value" json map |
| # include-topology-name: true # Include topology name in metric name (default false) |
| # metrics-cache-max-size: 1000000 # Max number of metrics cached and published (default 1000000) |
| # metrics-cache-ttl-sec: 600 # Time in seconds after which a metric that was collected will stopped being published (default 600) |
| # rules: |
| # # __jvm-peak-usage/G1-Survivor-Space-committed": "9", |
| # - pattern: __jvm-(.+)/(.+) |
| # name: jvm_$1_$2 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # # "__execute-time-ns/pulsar-prod-4/default": "418764", |
| # - pattern: __(?!jvm-+)(.+-count|.+-latency|.+-count|.+-time-ns)/(.+)/(.+) |
| # name: $1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # dest_component: "$2" |
| # context: "$3" |
| # # "__execute-time-ns/pulsar-prod-4": "418764", |
| # - pattern: __(?!jvm-+)(.+-count|.+-latency|.+-count|.+-time-ns)/(.+) |
| # name: $1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # context: "$2" |
| # # StreamManager |
| # # "__client_stmgr-17/__bytes_to_stmgrs": "7841039", |
| # - pattern: __(client_stmgr-.+)/__(.+_to_stmgrs) |
| # name: $2 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # dest_component: "$1" |
| # # "__connection_buffer_by_instanceid/container_1_pulsar-prod-9_201/bytes": "0.000000", |
| # - pattern: __(connection_buffer_by_instanceid)/container_(.+)_(.+)/(.+) |
| # name: $1_$4 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # dest_container: "$2" |
| # dest_task: "$3" |
| # # "__time_spent_back_pressure_by_compid/container_1_pulsar-prod-5_151": "0", |
| # - pattern: __(time_spent_back_pressure_by_compid)/container_(.+)_(.+) |
| # name: $1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # dest_container: "$2" |
| # dest_task: "$3" |
| # # PulsarSpoutMetrics of PulsarSpout 'PulsarSpoutMetrics/pulsar-prod-4-0/consumerThroughput' |
| # - pattern: PulsarSpout/(.+)/(.+) |
| # name: pulsar_spout_$2 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # component: "$1" |
| # - pattern: PulsarBolt/(.+)/(.+) |
| # name: pulsar_bolt_$2 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # component: "$1" |
| # # name: "kafkaConsumer-request-size-max/consumer-node-metrics/client-id-spout/node-id-node-1" |
| # - pattern: kafkaConsumer-(.+)/consumer-(node)-metrics/client-id-(.+)/node-id-(.+) |
| # name: kafka_consumer_$2_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$3" |
| # node_id: "$4" |
| # # name: "kafkaConsumer-commit-rate/consumer-coordinator-metrics/client-id-spout" |
| # - pattern: kafkaConsumer-(.+)/consumer-(coordinator)-metrics/client-id-(.+) |
| # name: kafka_consumer_$2_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$3" |
| # # name: "kafkaConsumer-records-lag-max/consumer-fetch-manager-metrics/client-id-spout/topic-nginx-lad-es/partition-1" |
| # - pattern: kafkaConsumer-(.+)/consumer-(fetch-manager)-metrics/client-id-(.+)/topic-(.+)/partition-(.+) |
| # name: kafka_consumer_$2_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$3" |
| # topic: "$4" |
| # partition: "$5" |
| # # name: "kafkaConsumer-records-per-request-avg/consumer-fetch-manager-metrics/client-id-spout/topic-nginx-adp-cms-api" |
| # - pattern: kafkaConsumer-(.+)/consumer-(fetch-manager)-metrics/client-id-(.+)/topic-(.+) |
| # name: kafka_consumer_$2_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$3" |
| # topic: "$4" |
| # # name: "kafkaConsumer-bytes-consumed-total/consumer-fetch-manager-metrics/client-id-consumer-1" |
| # - pattern: kafkaConsumer-(.+)/consumer-(feath-manager)-metrics/client-id-(.+) |
| # name: kafka_consumer_$2_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$3" |
| # - pattern: kafkaConsumer-(.+)/consumer-metrics/client-id-(.+)/node-id-(.+) |
| # name: kafka_consumer_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$2" |
| # node_id: "$3" |
| # - pattern: kafkaConsumer-(.+)/consumer-metrics/client-id-(.+) |
| # name: kafka_consumer_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$2" |
| # - pattern: kafkaConsumer-(.+)/app-info/client-id-(.+) |
| # name: kafka_consumer_$1 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # client_id: "$2" |
| # # kafkaOffset of KafkaSpout 'kafkaOffset/topicName/partition_2/spoutLag' |
| # - pattern: kafkaOffset/(.+)/partition_([0-9]+)/(.+) |
| # name: kafka_offset_$3 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # topic: "$1" |
| # partition: "$2" |
| # # kafkaOffset of KafkaSpout 'kafkaOffset/topicName/totalSpoutLag' |
| # - pattern: kafkaOffset/(.+)/(.+) |
| # name: kafka_offset_$2 |
| # attrNameSnakeCase: true |
| # type: COUNTER |
| # labels: |
| # topic: "$1" |
| |
| ### Config for graphite-sink |
| ### Currently the graphite-sink is disabled |
| # graphite-sink: |
| # class: "org.apache.heron.metricsmgr.sink.GraphiteSink" |
| # flush-frequency-ms: 60000 |
| # graphite_host: "127.0.0.1" # The host of graphite to be exported metrics to |
| # graphite_port: 2004 # The port of graphite to be exported metrics to |
| # metrics_prefix: "heron" # The prefix of every metrics |
| # server_max_reconnect-attempts: 20 # The max reconnect attempts when failing to connect to graphite server |