/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

option java_package = "org.apache.hadoop.hdfs.server.namenode";
option java_outer_classname = "FsImageProto";

package hadoop.hdfs.fsimage;

import "hdfs.proto";
import "acl.proto";
import "xattr.proto";

/**
 * This file defines the on-disk layout of the file system image. The
 * layout is defined by the following EBNF grammar, in which angle
 * brackets mark protobuf definitions. (e.g., <FileSummary>)
 *
 * FILE := MAGIC SECTION* <FileSummary> FileSummaryLength
 * MAGIC := 'HDFSIMG1'
 * SECTION := <NameSystemSection> | ...
 * FileSummaryLength := 4 byte int
 *
 * Some notes:
 *
 * The codec field in FileSummary describes the compression codec used
 * for all sections. The fileheader is always uncompressed.
 *
 * All protobuf messages are serialized in delimited form, which means
 * that there always will be an integer indicates the size of the
 * protobuf message.
 *
 */

message FileSummary {
  // The version of the above EBNF grammars.
  required uint32 ondiskVersion = 1;
  // layoutVersion describes which features are available in the
  // FSImage.
  required uint32 layoutVersion = 2;
  optional string codec         = 3;
  // index for each section
  message Section {
    optional string name = 1;
    optional uint64 length = 2;
    optional uint64 offset = 3;
  }
  repeated Section sections = 4;
}

/**
 * Name: NS_INFO
 */
message NameSystemSection {
  optional uint32 namespaceId = 1;
  optional uint64 genstampV1 = 2;
  optional uint64 genstampV2 = 3;
  optional uint64 genstampV1Limit = 4;
  optional uint64 lastAllocatedBlockId = 5;
  optional uint64 transactionId = 6;
  optional uint64 rollingUpgradeStartTime = 7;
  optional uint64 lastAllocatedStripedBlockId = 8;
}

/**
 * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big Endian).
 * The first and the second parts are the string ids of the user and
 * group name, and the last 16 bits are the permission bits.
 *
 * Name: INODE
 */
message INodeSection {
  /**
   * under-construction feature for INodeFile
   */
  message FileUnderConstructionFeature {
    optional string clientName = 1;
    optional string clientMachine = 2;
  }

  message AclFeatureProto {
    /**
     * An ACL entry is represented by a 32-bit integer in Big Endian
     * format. The bits can be divided in four segments:
     * [0:2) || [2:26) || [26:27) || [27:29) || [29:32)
     *
     * [0:2) -- reserved for futute uses.
     * [2:26) -- the name of the entry, which is an ID that points to a
     * string in the StringTableSection.
     * [26:27) -- the scope of the entry (AclEntryScopeProto)
     * [27:29) -- the type of the entry (AclEntryTypeProto)
     * [29:32) -- the permission of the entry (FsActionProto)
     *
     */
    repeated fixed32 entries = 2 [packed = true];
  }
  
  message XAttrCompactProto {
    /**
     * 
     * [0:2) -- the namespace of XAttr (XAttrNamespaceProto)
     * [2:26) -- the name of the entry, which is an ID that points to a
     * string in the StringTableSection.
     * [26:27) -- namespace extension. Originally there were only 4 namespaces
     * so only 2 bits were needed. At that time, this bit was reserved. When a
     * 5th namespace was created (raw) this bit became used as a 3rd namespace
     * bit.
     * [27:32) -- reserved for future uses.
     */
    required fixed32 name = 1;
    optional bytes value = 2;
  }
  
  message XAttrFeatureProto {
    repeated XAttrCompactProto xAttrs = 1;
  }
  
  message INodeFile {
    optional uint32 replication = 1;
    optional uint64 modificationTime = 2;
    optional uint64 accessTime = 3;
    optional uint64 preferredBlockSize = 4;
    optional fixed64 permission = 5;
    repeated BlockProto blocks = 6;
    optional FileUnderConstructionFeature fileUC = 7;
    optional AclFeatureProto acl = 8;
    optional XAttrFeatureProto xAttrs = 9;
    optional uint32 storagePolicyID = 10;
    optional bool isStriped = 11;
  }

  message QuotaByStorageTypeEntryProto {
    required StorageTypeProto storageType = 1;
    required uint64 quota = 2;
  }

  message QuotaByStorageTypeFeatureProto {
    repeated QuotaByStorageTypeEntryProto quotas = 1;
  }

  message INodeDirectory {
    optional uint64 modificationTime = 1;
    // namespace quota
    optional uint64 nsQuota = 2;
    // diskspace quota
    optional uint64 dsQuota = 3;
    optional fixed64 permission = 4;
    optional AclFeatureProto acl = 5;
    optional XAttrFeatureProto xAttrs = 6;
    optional QuotaByStorageTypeFeatureProto typeQuotas = 7;
  }

  message INodeSymlink {
    optional fixed64 permission = 1;
    optional bytes target = 2;
    optional uint64 modificationTime = 3;
    optional uint64 accessTime = 4;
  }

  message INode {
    enum Type {
      FILE = 1;
      DIRECTORY = 2;
      SYMLINK = 3;
    };
    required Type type = 1;
    required uint64 id = 2;
    optional bytes name = 3;

    optional INodeFile file = 4;
    optional INodeDirectory directory = 5;
    optional INodeSymlink symlink = 6;
  }

  optional uint64 lastInodeId = 1;
  optional uint64 numInodes = 2;
  // repeated INodes..
}

/**
 * This section records information about under-construction files for
 * reconstructing the lease map.
 * NAME: FILES_UNDERCONSTRUCTION
 */
message FilesUnderConstructionSection {
  message FileUnderConstructionEntry {
    optional uint64 inodeId = 1;
    optional string fullPath = 2;
  }
  // repeated FileUnderConstructionEntry...
}

/**
 * This section records the children of each directories
 * NAME: INODE_DIR
 */
message INodeDirectorySection {
  /**
   * A single DirEntry needs to fit in the default PB max message size of
   * 64MB. Please be careful when adding more fields to a DirEntry!
   */
  message DirEntry {
    optional uint64 parent = 1;
    // children that are not reference nodes
    repeated uint64 children = 2 [packed = true];
    // children that are reference nodes, each element is a reference node id
    repeated uint32 refChildren = 3 [packed = true];
  }
  // repeated DirEntry, ended at the boundary of the section.
}

message INodeReferenceSection {
  message INodeReference {
    // id of the referred inode
    optional uint64 referredId = 1;
    // local name recorded in WithName
    optional bytes name = 2;
    // recorded in DstReference
    optional uint32 dstSnapshotId = 3;
    // recorded in WithName
    optional uint32 lastSnapshotId = 4;
  }
  // repeated INodeReference...
}

/**
 * This section records the information about snapshot
 * NAME: SNAPSHOT
 */
message SnapshotSection {
  message Snapshot {
    optional uint32 snapshotId = 1;
    // Snapshot root
    optional INodeSection.INode root = 2;
  }

  optional uint32 snapshotCounter = 1;
  repeated uint64 snapshottableDir = 2 [packed = true];
  // total number of snapshots
  optional uint32 numSnapshots = 3;
  // repeated Snapshot...
}

/**
 * This section records information about snapshot diffs
 * NAME: SNAPSHOT_DIFF
 */
message SnapshotDiffSection {
  message CreatedListEntry {
    optional bytes name = 1;
  }

  message DirectoryDiff {
    optional uint32 snapshotId = 1;
    optional uint32 childrenSize = 2;
    optional bool isSnapshotRoot = 3;
    optional bytes name = 4;
    optional INodeSection.INodeDirectory snapshotCopy = 5;
    optional uint32 createdListSize = 6;
    repeated uint64 deletedINode = 7 [packed = true]; // id of deleted inodes
    // id of reference nodes in the deleted list
    repeated uint32 deletedINodeRef = 8 [packed = true];
    // repeated CreatedListEntry (size is specified by createdListSize)
  }

  message FileDiff {
    optional uint32 snapshotId = 1;
    optional uint64 fileSize = 2;
    optional bytes name = 3;
    optional INodeSection.INodeFile snapshotCopy = 4;
    repeated BlockProto blocks = 5;
  }

  message DiffEntry {
    enum Type {
      FILEDIFF = 1;
      DIRECTORYDIFF = 2;
    }
    required Type type = 1;
    optional uint64 inodeId = 2;
    optional uint32 numOfDiff = 3;

    // repeated DirectoryDiff or FileDiff
  }

  // repeated DiffEntry
}

/**
 * This section maps string to id
 * NAME: STRING_TABLE
 */
message StringTableSection {
  message Entry {
    optional uint32 id = 1;
    optional string str = 2;
  }
  optional uint32 numEntry = 1;
  // repeated Entry
}

message SecretManagerSection {
  message DelegationKey {
    optional uint32 id         = 1;
    optional uint64 expiryDate = 2;
    optional bytes  key        = 3;
  }
  message PersistToken {
    optional uint32 version        = 1;
    optional string owner          = 2;
    optional string renewer        = 3;
    optional string realUser       = 4;
    optional uint64 issueDate      = 5;
    optional uint64 maxDate        = 6;
    optional uint32 sequenceNumber = 7;
    optional uint32 masterKeyId    = 8;
    optional uint64 expiryDate     = 9;
  }
  optional uint32 currentId = 1;
  optional uint32 tokenSequenceNumber = 2;
  optional uint32 numKeys = 3;
  optional uint32 numTokens = 4;
  // repeated DelegationKey keys
  // repeated PersistToken tokens
}

message CacheManagerSection {
  required uint64 nextDirectiveId = 1;
  required uint32 numPools        = 2;
  required uint32 numDirectives   = 3;
  // repeated CachePoolInfoProto pools
  // repeated CacheDirectiveInfoProto directives
}
