blob: 76b06bbf152a40402f6314347336742572ad3605 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mesos.internal.log;
option cc_enable_arenas = true;
// Represents a "promise" that a replica has made. A promise is
// *implicitly* valid for _all_ future actions that get performed on
// the replicated log (provided the action comes from the same
// proposer), until a new promise is made to a proposer with a higher
// proposal number. Each replica writes every promise it makes as a
// log record so that it can recover this information after a failure.
// TODO(benh): Does the promise actually need to be written to stable
// storage? Can we get away with looking at the last written action
// and using it's promised value? In this case, what happens if we
// make a promise but don't receive an action from that coordinator?
message Promise {
required uint64 proposal = 1;
}
// Represents an "action" performed on the log. Each action has an
// associated position in the log. In addition, each action (i.e.,
// position) will have been "promised" to a specific proposer
// (implicitly or explicitly) and may have been "performed" from a
// specific proposer. An action may also be "learned" to have reached
// consensus. There are three types of possible actions that can be
// performed on the log: nop (no action), append, and truncate.
message Action {
required uint64 position = 1;
required uint64 promised = 2;
optional uint64 performed = 3;
optional bool learned = 4;
enum Type {
NOP = 1;
APPEND = 2;
TRUNCATE = 3;
}
message Nop {
optional bool tombstone = 1; // True if the position was truncated.
}
message Append {
required bytes bytes = 1;
optional bytes cksum = 2;
}
message Truncate {
required uint64 to = 1; // All positions before and exclusive of 'to'.
}
optional Type type = 5; // Set if performed is set.
optional Nop nop = 6;
optional Append append = 7;
optional Truncate truncate = 8;
}
// The metadata of a replica. It has to be persisted on the disk. We
// store the current status of the replica as well as the implicit
// promise that a replica has made. This message is intended to
// replace the old Promise message to support catch-up.
message Metadata {
enum Status {
VOTING = 1; // Normal voting member in Paxos group.
RECOVERING = 2; // In the process of catching up.
STARTING = 3; // The log has been initialized.
EMPTY = 4; // The log is empty and is not initialized.
}
required Status status = 1 [default = EMPTY];
required uint64 promised = 2 [default = 0];
}
// Represents a log record written to the local filesystem by a
// replica. A log record may store a promise (DEPRECATED), an action
// or metadata (defined above).
message Record {
enum Type {
PROMISE = 1; // DEPRECATED!
ACTION = 2;
METADATA = 3;
}
required Type type = 1;
optional Promise promise = 2; // DEPRECATED!
optional Action action = 3;
optional Metadata metadata = 4;
}
////////////////////////////////////////////////////
// Replicated log request/responses and messages. //
////////////////////////////////////////////////////
// Represents a "promise" request from a proposer with the specified
// 'proposal' to a replica. If the proposer is a coordinator, most
// such requests will occur after a coordinator has failed and a new
// coordinator is elected. In such a case, the position that the
// coordinator is asking the replica to promise is implicitly *all*
// positions that the replica has made no promises (thus the position
// field is not be used). In other instances, however, a proposer
// might be explicitly trying to request that a replica promise a
// specific position in the log (such as when trying to fill holes
// discovered during a client read), and then the 'position' field
// will be present.
message PromiseRequest {
required uint64 proposal = 1;
optional uint64 position = 2;
}
// Represents a promise response corresponding to a promise request.
// The kind of the response is given by the "type" field:
//
// 1. IGNORED: The recipient of the promise request was not in VOTING
// state, so it ignored the request.
// 2. REJECT: The recipient of the proposal has already promised a
// proposer with a higher proposal number. This is called a
// "NACK" in the code.
// 3. ACCEPT: The promise request was accepted.
//
// Before 0.26, we only sent responses for cases 2 and 3, so the
// 'okay' field was used to distinguish these responses. For backward
// compatibility, we continue setting 'okay' to false for both cases 1
// and 2; this means old masters will treat IGNORED as a NACK: this
// might result in demoting the current coordinator, but that should
// be tolerable. TODO(neilc): Remove 'okay' in 0.27.
//
// The 'proposal' is either the aforementioned higher proposal number
// (for case 2), or the corresponding request's proposal number (for
// cases 1 and 3). The replica either sends back the highest position
// it has recorded in the log (using the 'position' field) or the
// specific action (if any) it has at the position requested in
// PromiseRequest (using the 'action' field).
message PromiseResponse {
enum Type {
ACCEPT = 1;
REJECT = 2;
// NOTE: Change in tense here is to avoid name collisions with
// Windows headers.
IGNORED = 3;
}
required bool okay = 1; // DEPRECATED
optional Type type = 5;
required uint64 proposal = 2;
optional uint64 position = 4;
optional Action action = 3;
}
// Represents a write request for a specific type of action. Note that
// we deliberately do not include the entire Action as it contains
// fields that are not relevant to a write request (e.g., promised,
// performed) and rather than ignore them we exclude them for safety.
message WriteRequest {
required uint64 proposal = 1;
required uint64 position = 2;
optional bool learned = 3;
required Action.Type type = 4;
optional Action.Nop nop = 5;
optional Action.Append append = 6;
optional Action.Truncate truncate = 7;
}
// Represents a write response corresponding to a write request. The
// kind of the response is given by the "type" field:
//
// 1. IGNORED: The recipient of the write request was not in VOTING
// state, so it ignored the request.
// 2. REJECT: The recipient of the proposal has already promised a
// proposer with a higher proposal number. This is called a
// "NACK" in the code.
// 3. ACCEPT: The promise request was accepted.
//
// Before 0.26, we only sent responses for cases 2 and 3, so the
// 'okay' field was used to distinguish these responses. For backward
// compatibility, we continue setting 'okay' to false for both cases 1
// and 2; this means old masters will treat IGNORED as a NACK: this
// might result in demoting the current coordinator, but that should
// be tolerable. TODO(neilc): Remove 'okay' in 0.27.
//
// The 'position' should always correspond to the position set in the
// request. The 'proposal' is either the same proposal number set in
// the request (cases 1 and 3), or the higher proposal number
// this position has been promised to (case 2).
message WriteResponse {
enum Type {
ACCEPT = 1;
REJECT = 2;
// NOTE: Change in tense here is to avoid name collisions with
// Windows headers.
IGNORED = 3;
}
required bool okay = 1; // DEPRECATED
optional Type type = 4;
required uint64 proposal = 2;
required uint64 position = 3;
}
// Represents a "learned" event, that is, when a particular action has
// been agreed upon (reached consensus).
message LearnedMessage {
required Action action = 1;
}
// Represents a recover request. A recover request is used to initiate
// the recovery (by broadcasting it).
message RecoverRequest {}
// When a replica receives a RecoverRequest, it will reply with its
// current status, and the begin and the end of its current log.
message RecoverResponse {
required Metadata.Status status = 1;
optional uint64 begin = 2;
optional uint64 end = 3;
}