| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package mesos.internal.log; |
| |
| |
| // Represents a "promise" that a replica has made. A promise is |
| // *implicitly* valid for _all_ future actions that get performed on |
| // the replicated log (provided the action comes from the same |
| // proposer), until a new promise is made to a proposer with a higher |
| // proposal number. Each replica writes every promise it makes as a |
| // log record so that it can recover this information after a failure. |
| // TODO(benh): Does the promise actually need to be written to stable |
| // storage? Can we get away with looking at the last written action |
| // and using it's promised value? In this case, what happens if we |
| // make a promise but don't receive an action from that coordinator? |
| message Promise { |
| required uint64 proposal = 1; |
| } |
| |
| |
| // Represents an "action" performed on the log. Each action has an |
| // associated position in the log. In addition, each action (i.e., |
| // position) will have been "promised" to a specific proposer |
| // (implicitly or explicitly) and may have been "performed" from a |
| // specific proposer. An action may also be "learned" to have reached |
| // consensus. There are three types of possible actions that can be |
| // performed on the log: nop (no action), append, and truncate. |
| message Action { |
| required uint64 position = 1; |
| required uint64 promised = 2; |
| optional uint64 performed = 3; |
| optional bool learned = 4; |
| |
| enum Type { |
| NOP = 1; |
| APPEND = 2; |
| TRUNCATE = 3; |
| } |
| |
| message Nop {} |
| |
| message Append { |
| required bytes bytes = 1; |
| optional bytes cksum = 2; |
| } |
| |
| message Truncate { |
| required uint64 to = 1; // All positions before and exclusive of 'to'. |
| } |
| |
| optional Type type = 5; // Set iff performed is set. |
| optional Nop nop = 6; |
| optional Append append = 7; |
| optional Truncate truncate = 8; |
| } |
| |
| |
| // The metadata of a replica. It has to be persisted on the disk. We |
| // store the current status of the replica as well as the implicit |
| // promise that a replica has made. This message is intended to |
| // replace the old Promise message to support catch-up. |
| message Metadata { |
| enum Status { |
| VOTING = 1; // Normal voting member in Paxos group. |
| RECOVERING = 2; // In the process of catching up. |
| STARTING = 3; // The log has been initialized. |
| EMPTY = 4; // The log is empty and is not initialized. |
| } |
| |
| required Status status = 1 [default = EMPTY]; |
| required uint64 promised = 2 [default = 0]; |
| } |
| |
| |
| // Represents a log record written to the local filesystem by a |
| // replica. A log record may store a promise (DEPRECATED), an action |
| // or metadata (defined above). |
| message Record { |
| enum Type { |
| PROMISE = 1; // DEPRECATED! |
| ACTION = 2; |
| METADATA = 3; |
| } |
| |
| required Type type = 1; |
| optional Promise promise = 2; // DEPRECATED! |
| optional Action action = 3; |
| optional Metadata metadata = 4; |
| } |
| |
| |
| //////////////////////////////////////////////////// |
| // Replicated log request/responses and messages. // |
| //////////////////////////////////////////////////// |
| |
| |
| // Represents a "promise" request from a proposer with the specified |
| // 'proposal' to a replica. If the proposer is a coordinator, most |
| // such requests will occur after a coordinator has failed and a new |
| // coordinator is elected. In such a case, the position that the |
| // coordinator is asking the replica to promise is implicitly *all* |
| // positions that the replica has made no promises (thus the position |
| // field is not be used). In other instances, however, a proposer |
| // might be explicitly trying to request that a replica promise a |
| // specific position in the log (such as when trying to fill holes |
| // discovered during a client read), and then the 'position' field |
| // will be present. |
| message PromiseRequest { |
| required uint64 proposal = 1; |
| optional uint64 position = 2; |
| } |
| |
| // Represents a promise response corresponding to a promise request. |
| // The kind of the response is given by the "type" field: |
| // |
| // 1. IGNORED: The recipient of the promise request was not in VOTING |
| // state, so it ignored the request. |
| // 2. REJECT: The recipient of the proposal has already promised a |
| // proposer with a higher proposal number. This is called a |
| // "NACK" in the code. |
| // 3. ACCEPT: The promise request was accepted. |
| // |
| // Before 0.26, we only sent responses for cases 2 and 3, so the |
| // 'okay' field was used to distinguish these responses. For backward |
| // compatibility, we continue setting 'okay' to false for both cases 1 |
| // and 2; this means old masters will treat IGNORED as a NACK: this |
| // might result in demoting the current coordinator, but that should |
| // be tolerable. TODO(neilc): Remove 'okay' in 0.27. |
| // |
| // The 'proposal' is either the aforementioned higher proposal number |
| // (for case 2), or the corresponding request's proposal number (for |
| // cases 1 and 3). The replica either sends back the highest position |
| // it has recorded in the log (using the 'position' field) or the |
| // specific action (if any) it has at the position requested in |
| // PromiseRequest (using the 'action' field). |
| message PromiseResponse { |
| enum Type { |
| ACCEPT = 1; |
| REJECT = 2; |
| // NOTE: Change in tense here is to avoid name collisions with |
| // Windows headers. |
| IGNORED = 3; |
| } |
| |
| required bool okay = 1; // DEPRECATED |
| optional Type type = 5; |
| required uint64 proposal = 2; |
| optional uint64 position = 4; |
| optional Action action = 3; |
| } |
| |
| |
| // Represents a write request for a specific type of action. Note that |
| // we deliberately do not include the entire Action as it contains |
| // fields that are not relevant to a write request (e.g., promised, |
| // performed) and rather than ignore them we exclude them for safety. |
| message WriteRequest { |
| required uint64 proposal = 1; |
| required uint64 position = 2; |
| optional bool learned = 3; |
| required Action.Type type = 4; |
| optional Action.Nop nop = 5; |
| optional Action.Append append = 6; |
| optional Action.Truncate truncate = 7; |
| } |
| |
| |
| // Represents a write response corresponding to a write request. The |
| // kind of the response is given by the "type" field: |
| // |
| // 1. IGNORED: The recipient of the write request was not in VOTING |
| // state, so it ignored the request. |
| // 2. REJECT: The recipient of the proposal has already promised a |
| // proposer with a higher proposal number. This is called a |
| // "NACK" in the code. |
| // 3. ACCEPT: The promise request was accepted. |
| // |
| // Before 0.26, we only sent responses for cases 2 and 3, so the |
| // 'okay' field was used to distinguish these responses. For backward |
| // compatibility, we continue setting 'okay' to false for both cases 1 |
| // and 2; this means old masters will treat IGNORED as a NACK: this |
| // might result in demoting the current coordinator, but that should |
| // be tolerable. TODO(neilc): Remove 'okay' in 0.27. |
| // |
| // The 'position' should always correspond to the position set in the |
| // request. The 'proposal' is either the same proposal number set in |
| // the request (cases 1 and 3), or the higher proposal number |
| // this position has been promised to (case 2). |
| message WriteResponse { |
| enum Type { |
| ACCEPT = 1; |
| REJECT = 2; |
| // NOTE: Change in tense here is to avoid name collisions with |
| // Windows headers. |
| IGNORED = 3; |
| } |
| |
| required bool okay = 1; // DEPRECATED |
| optional Type type = 4; |
| required uint64 proposal = 2; |
| required uint64 position = 3; |
| } |
| |
| |
| // Represents a "learned" event, that is, when a particular action has |
| // been agreed upon (reached consensus). |
| message LearnedMessage { |
| required Action action = 1; |
| } |
| |
| |
| // Represents a recover request. A recover request is used to initiate |
| // the recovery (by broadcasting it). |
| message RecoverRequest {} |
| |
| |
| // When a replica receives a RecoverRequest, it will reply with its |
| // current status, and the begin and the end of its current log. |
| message RecoverResponse { |
| required Metadata.Status status = 1; |
| optional uint64 begin = 2; |
| optional uint64 end = 3; |
| } |