// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
syntax = "proto2";
package heron.proto.tmanager;
option java_package = "org.apache.heron.proto.tmanager";
option java_outer_classname = "TopologyManager";
import "common.proto";
import "stats.proto";
import "physical_plan.proto";
message TManagerLocation {
required string topology_name = 1;
required string topology_id = 2;
required string host = 3;
// The port to talk to the tmanager
// for topology control actions
// like Activate/DeActivate/Drain etc
required int32 controller_port = 4;
// The port that different components
// of the topology use to talk to the tmanager
// like stmgr for getting pplans
// and metrics mgr for sending stats
required int32 server_port = 5;
// The port that is a http endpoint
// to publish stats about the topology
optional int32 stats_port = 6;
message MetricsCacheLocation {
required string topology_name = 1;
required string topology_id = 2;
required string host = 3;
// The port to talk to the matricscache
// for topology control actions
// reserved for future use
required int32 controller_port = 4;
// The port that different components
// of the topology use to talk to the metricscache
// like metrics mgr for sending stats
required int32 server_port = 5;
// The port that is a http endpoint
// to publish stats about the topology
optional int32 stats_port = 6;
// interface called by the StreamManagers.
message StMgrRegisterRequest {
// Info about myself.
required heron.proto.system.StMgr stmgr = 1;
// All the instances that are on my node.
repeated heron.proto.system.Instance instances = 2;
message StMgrRegisterResponse {
// What was the status of this registration reqeust
required heron.proto.system.Status status = 1;
// If the assignment is already known, its sent
optional heron.proto.system.PhysicalPlan pplan = 2;
message StMgrHeartbeatRequest {
// what time the heartbeat was sent at.
required int64 heartbeat_time = 1;
// Any stats that nodemanager has
required heron.proto.system.StMgrStats stats = 2;
message StMgrHeartbeatResponse {
// What was the status of this heartbeat reqeust
required heron.proto.system.Status status = 1;
// interface called by the metric managers
// TODO: Rename this message to something else to avoid confusion with metrics::MetricDatum
message MetricDatum {
// What is the name of the component
// Like Tail-FlatMap or __stmgr__
required string component_name = 1;
// In case of a regular instance, it is the instance's
// instance_id. For stmgr it is the stmgr_id
required string instance_id = 2;
required string name = 3;
required string value = 4;
required int64 timestamp = 5;
message TmanagerExceptionLog {
// Source of exception.
required string component_name = 1;
// Current hostname.
required string hostname = 2;
// In case of a regular instance, it is the instance's
// instance_id. For stmgr it is the stmgr_id
required string instance_id = 3;
// TODO: Use heron.proto.system.metrics.ExceptionData isntead of following 5 fields
// Stack trace of exception. First two lines of stack trace is used for aggregating exception.
required string stacktrace = 4;
// Last time the exception occurred in the metrics collection interval
required string lasttime = 5;
// First time the exception occurred in the metrics collection interval
required string firsttime = 6;
// Number of time exception occurred in the metrics collection interval
required int32 count = 7;
// Additional text logged with the exception.
optional string logging = 8;
message PublishMetrics {
repeated MetricDatum metrics = 1;
repeated TmanagerExceptionLog exceptions = 2;
// interface called by the UI
// TManager exposes a web endpoint
// You send a post request with the protobuf
// as the data. The response will be MetricResponse
message MetricInterval {
required int64 start = 1;
required int64 end = 2;
message MetricRequest {
required string component_name = 1;
// The instance ids to get the stats from
// If nothing is specified, we will get from
// all the instances of the component name
repeated string instance_id = 2;
// What set of metrics you are interested in
// Example is __emit-count/default
repeated string metric = 3;
// what timeframe data in seconds
// -1/0 means everything
// What is the time interval that you want the metrics for
// Clients can specify one in many ways.
// 1. interval. Essentially seconds worth of metrics before the current time
// 2. explicit_interval. An explicit interval
optional int64 interval = 4;
optional MetricInterval explicit_interval = 5;
// Do you want metrics broken down on a per minute basis?
optional bool minutely = 6 [default = false];
message MetricResponse {
message IndividualMetric {
required string name = 1;
optional string value = 2;
message IntervalValue {
required string value = 1;
required MetricInterval interval = 2;
repeated IntervalValue interval_values = 3;
message TaskMetric {
required string instance_id = 1;
repeated IndividualMetric metric = 2;
required heron.proto.system.Status status = 1;
// The order is the same as the request
repeated TaskMetric metric = 2;
// The interval we got the data for.
// This may not be present in case status is NOTOK
optional int64 interval = 3;
// List of Exception bucketed by metrics aggregation granularity(minute).
message ExceptionLogResponse {
required heron.proto.system.Status status = 1;
// List of exceptions.
repeated TmanagerExceptionLog exceptions = 2;
// Request to fetch exception from tmanager store of exception and metrics
message ExceptionLogRequest {
// TODO: Make this repeated so that single request can send data for multiple components.
required string component_name = 1;
// List of instances for exception request. If instances are empty, exception log
// for all instances will be returned.
repeated string instances = 2;
// Request to fetch stream managers registration status
message StmgrsRegistrationSummaryRequest {
// List of registered and absent stream managers
message StmgrsRegistrationSummaryResponse {
// registered stream managers
repeated string registered_stmgrs = 1;
// absent stream managers
repeated string absent_stmgrs = 2;