| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package mesos.internal.log; |
| |
| |
| // Represents a "promise" that a replica has made. A promise is |
| // *implicitly* valid for _all_ future actions that get performed on |
| // the replicated log (provided the action comes from the same |
| // proposer), until a new promise is made to a proposer with a higher |
| // proposal number. Each replica writes every promise it makes as a |
| // log record so that it can recover this information after a failure. |
| // TODO(benh): Does the promise actually need to be written to stable |
| // storage? Can we get away with looking at the last written action |
| // and using it's promised value? In this case, what happens if we |
| // make a promise but don't receive an action from that coordinator? |
| message Promise { |
| required uint64 proposal = 1; |
| } |
| |
| |
| // Represents an "action" performed on the log. Each action has an |
| // associated position in the log. In addition, each action (i.e., |
| // position) will have been "promised" to a specific proposer |
| // (implicitly or explicitly) and may have been "performed" from a |
| // specific proposer. An action may also be "learned" to have reached |
| // consensus. There are three types of possible actions that can be |
| // performed on the log: nop (no action), append, and truncate. |
| message Action { |
| required uint64 position = 1; |
| required uint64 promised = 2; |
| optional uint64 performed = 3; |
| optional bool learned = 4; |
| |
| enum Type { |
| NOP = 1; |
| APPEND = 2; |
| TRUNCATE = 3; |
| } |
| |
| message Nop {} |
| |
| message Append { |
| required bytes bytes = 1; |
| optional bytes cksum = 2; |
| } |
| |
| message Truncate { |
| required uint64 to = 1; // All positions before and exclusive of 'to'. |
| } |
| |
| optional Type type = 5; // Set iff performed is set. |
| optional Nop nop = 6; |
| optional Append append = 7; |
| optional Truncate truncate = 8; |
| } |
| |
| |
| // The metadata of a replica. It has to be persisted on the disk. We |
| // store the current status of the replica as well as the implicit |
| // promise that a replica has made. This message is intended to |
| // replace the old Promise message to support catch-up. |
| message Metadata { |
| enum Status { |
| VOTING = 1; // Normal voting member in Paxos group. |
| RECOVERING = 2; // In the process of catching up. |
| STARTING = 3; // The log has been initialized. |
| EMPTY = 4; // The log is empty and is not initialized. |
| } |
| |
| required Status status = 1 [default = EMPTY]; |
| required uint64 promised = 2 [default = 0]; |
| } |
| |
| |
| // Represents a log record written to the local filesystem by a |
| // replica. A log record may store a promise (DEPRECATED), an action |
| // or metadata (defined above). |
| message Record { |
| enum Type { |
| PROMISE = 1; // DEPRECATED! |
| ACTION = 2; |
| METADATA = 3; |
| } |
| |
| required Type type = 1; |
| optional Promise promise = 2; // DEPRECATED! |
| optional Action action = 3; |
| optional Metadata metadata = 4; |
| } |
| |
| |
| //////////////////////////////////////////////////// |
| // Replicated log request/responses and messages. // |
| //////////////////////////////////////////////////// |
| |
| |
| // Represents a "promise" request from a proposer with the specified |
| // 'proposal' to a replica. If the proposer is a coordinator, most |
| // such requests will occur after a coordinator has failed and a new |
| // coordinator is elected. In such a case, the position that the |
| // coordinator is asking the replica to promise is implicitly *all* |
| // positions that the replica has made no promises (thus the position |
| // field is not be used). In other instances, however, a proposer |
| // might be explicitly trying to request that a replica promise a |
| // specific position in the log (such as when trying to fill holes |
| // discovered during a client read), and then the 'position' field |
| // will be present. |
| message PromiseRequest { |
| required uint64 proposal = 1; |
| optional uint64 position = 2; |
| } |
| |
| |
| // Represents a "promise" response from a replica back to a proposer. |
| // A replica represents a NACK (because it has promised a proposer |
| // with a higher proposal number) by setting the okay field to false. |
| // The 'proposal' is either the aforementioned higher proposal number |
| // when the response is a NACK, or the corresponding request's |
| // proposal number if it is an ACK. The replica either sends back the |
| // highest position it has recorded in the log (using the 'position' |
| // field) or the specific action (if any) it has at the position |
| // requested in PromiseRequest (using the 'action' field). |
| message PromiseResponse { |
| required bool okay = 1; |
| required uint64 proposal = 2; |
| optional uint64 position = 4; |
| optional Action action = 3; |
| } |
| |
| |
| // Represents a write request for a specific type of action. Note that |
| // we deliberately do not include the entire Action as it contains |
| // fields that are not relevant to a write request (e.g., promised, |
| // performed) and rather than ignore them we exclude them for safety. |
| message WriteRequest { |
| required uint64 proposal = 1; |
| required uint64 position = 2; |
| optional bool learned = 3; |
| required Action.Type type = 4; |
| optional Action.Nop nop = 5; |
| optional Action.Append append = 6; |
| optional Action.Truncate truncate = 7; |
| } |
| |
| |
| // Represents a write response corresponding to a write request. A |
| // replica represents a NACK (because it has promised a proposer with |
| // a higher proposal number) by setting the okay field to false. If |
| // the proposer is a coordinator, then it has been demoted. The |
| // 'position' should always correspond to the position set in the |
| // request. The 'proposal' is either the same proposal number set in |
| // the request in the case of an ACK, or the higher proposal number |
| // this position has been promised to in the case of a NACK. |
| message WriteResponse { |
| required bool okay = 1; |
| required uint64 proposal = 2; |
| required uint64 position = 3; |
| } |
| |
| |
| // Represents a "learned" event, that is, when a particular action has |
| // been agreed upon (reached consensus). |
| message LearnedMessage { |
| required Action action = 1; |
| } |
| |
| |
| // Represents a recover request. A recover request is used to initiate |
| // the recovery (by broadcasting it). |
| message RecoverRequest {} |
| |
| |
| // When a replica receives a RecoverRequest, it will reply with its |
| // current status, and the begin and the end of its current log. |
| message RecoverResponse { |
| required Metadata.Status status = 1; |
| optional uint64 begin = 2; |
| optional uint64 end = 3; |
| } |