blob: 43a98d52c145475de0093f1b30a53bd7478a0640 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED;
import static org.apache.hadoop.hdfs.protocol.BlockType.CONTIGUOUS;
import static org.apache.hadoop.hdfs.protocol.BlockType.STRIPED;
import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID;
import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.NO_SNAPSHOT_ID;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
import org.apache.hadoop.hdfs.protocol.BlockType;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiff;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileWithSnapshotFeature;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.DiffList;
import org.apache.hadoop.hdfs.util.LongBitFormat;
import org.apache.hadoop.util.StringUtils;
import static org.apache.hadoop.io.erasurecode.ErasureCodeConstants.REPLICATION_POLICY_ID;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
/** I-node for closed file. */
@InterfaceAudience.Private
public class INodeFile extends INodeWithAdditionalFields
implements INodeFileAttributes, BlockCollection {
/**
* Erasure Coded striped blocks have replication factor of 1.
*/
public static final short DEFAULT_REPL_FOR_STRIPED_BLOCKS = 1;
/** The same as valueOf(inode, path, false). */
public static INodeFile valueOf(INode inode, String path
) throws FileNotFoundException {
return valueOf(inode, path, false);
}
/** Cast INode to INodeFile. */
public static INodeFile valueOf(INode inode, String path, boolean acceptNull)
throws FileNotFoundException {
if (inode == null) {
if (acceptNull) {
return null;
} else {
throw new FileNotFoundException("File does not exist: " + path);
}
}
if (!inode.isFile()) {
throw new FileNotFoundException("Path is not a file: " + path);
}
return inode.asFile();
}
/**
* Bit format:
* [4-bit storagePolicyID][12-bit BLOCK_LAYOUT_AND_REDUNDANCY]
* [48-bit preferredBlockSize]
*
* BLOCK_LAYOUT_AND_REDUNDANCY contains 12 bits and describes the layout and
* redundancy of a block. We use the highest 1 bit to determine whether the
* block is replica or erasure coded. For replica blocks, the tail 11 bits
* stores the replication factor. For erasure coded blocks, the tail 11 bits
* stores the EC policy ID, and in the future, we may further divide these
* 11 bits to store both the EC policy ID and replication factor for erasure
* coded blocks. The layout of this section is demonstrated as below.
*
* Another possible future extension is for future block types, in which case
* the 'Replica or EC' bit may be extended into the 11 bit field.
*
* +---------------+-------------------------------+
* | 1 bit | 11 bit |
* +---------------+-------------------------------+
* | Replica or EC |Replica factor or EC policy ID |
* +---------------+-------------------------------+
*
* BLOCK_LAYOUT_AND_REDUNDANCY format for replicated block:
* 0 [11-bit replication]
*
* BLOCK_LAYOUT_AND_REDUNDANCY format for striped block:
* 1 [11-bit ErasureCodingPolicy ID]
*/
enum HeaderFormat {
PREFERRED_BLOCK_SIZE(null, 48, 1),
BLOCK_LAYOUT_AND_REDUNDANCY(PREFERRED_BLOCK_SIZE.BITS,
HeaderFormat.LAYOUT_BIT_WIDTH + 11, 0),
STORAGE_POLICY_ID(BLOCK_LAYOUT_AND_REDUNDANCY.BITS,
BlockStoragePolicySuite.ID_BIT_LENGTH, 0);
private final LongBitFormat BITS;
/**
* Number of bits used to encode block layout type.
* Different types can be replica or EC
*/
private static final int LAYOUT_BIT_WIDTH = 1;
private static final int MAX_REDUNDANCY = (1 << 11) - 1;
HeaderFormat(LongBitFormat previous, int length, long min) {
BITS = new LongBitFormat(name(), previous, length, min);
}
static short getReplication(long header) {
if (isStriped(header)) {
return DEFAULT_REPL_FOR_STRIPED_BLOCKS;
} else {
long layoutRedundancy =
BLOCK_LAYOUT_AND_REDUNDANCY.BITS.retrieve(header);
return (short) (layoutRedundancy & MAX_REDUNDANCY);
}
}
static byte getECPolicyID(long header) {
long layoutRedundancy = BLOCK_LAYOUT_AND_REDUNDANCY.BITS.retrieve(header);
return (byte) (layoutRedundancy & MAX_REDUNDANCY);
}
static long getPreferredBlockSize(long header) {
return PREFERRED_BLOCK_SIZE.BITS.retrieve(header);
}
static byte getStoragePolicyID(long header) {
return (byte)STORAGE_POLICY_ID.BITS.retrieve(header);
}
static byte getBlockLayoutPolicy(long header) {
return (byte)BLOCK_LAYOUT_AND_REDUNDANCY.BITS.retrieve(header);
}
// Union of all the block type masks. Currently there is only
// BLOCK_TYPE_MASK_STRIPED
static final long BLOCK_TYPE_MASK = 1 << 11;
// Mask to determine if the block type is striped.
static final long BLOCK_TYPE_MASK_STRIPED = 1 << 11;
static boolean isStriped(long header) {
return getBlockType(header) == STRIPED;
}
static BlockType getBlockType(long header) {
long layoutRedundancy = BLOCK_LAYOUT_AND_REDUNDANCY.BITS.retrieve(header);
long blockType = layoutRedundancy & BLOCK_TYPE_MASK;
if (blockType == BLOCK_TYPE_MASK_STRIPED) {
return STRIPED;
} else {
return CONTIGUOUS;
}
}
/**
* Construct block layout redundancy based on the given BlockType,
* replication factor and EC PolicyID.
*/
static long getBlockLayoutRedundancy(BlockType blockType,
Short replication, Byte erasureCodingPolicyID) {
if (null == erasureCodingPolicyID) {
erasureCodingPolicyID = REPLICATION_POLICY_ID;
}
long layoutRedundancy = 0xFF & erasureCodingPolicyID;
switch (blockType) {
case STRIPED:
if (replication != null) {
throw new IllegalArgumentException(
"Illegal replication for STRIPED block type");
}
if (erasureCodingPolicyID == REPLICATION_POLICY_ID) {
throw new IllegalArgumentException(
"Illegal REPLICATION policy for STRIPED block type");
}
if (null == ErasureCodingPolicyManager.getInstance()
.getByID(erasureCodingPolicyID)) {
throw new IllegalArgumentException(String.format(
"Could not find EC policy with ID 0x%02x",
erasureCodingPolicyID));
}
// valid parameters for STRIPED
layoutRedundancy |= BLOCK_TYPE_MASK_STRIPED;
break;
case CONTIGUOUS:
if (erasureCodingPolicyID != REPLICATION_POLICY_ID) {
throw new IllegalArgumentException(String.format(
"Illegal EC policy 0x%02x for CONTIGUOUS block type",
erasureCodingPolicyID));
}
if (null == replication ||
replication < 0 || replication > MAX_REDUNDANCY) {
throw new IllegalArgumentException("Invalid replication value "
+ replication);
}
// valid parameters for CONTIGUOUS
layoutRedundancy |= replication;
break;
default:
throw new IllegalArgumentException("Unknown blockType: " + blockType);
}
return layoutRedundancy;
}
static long toLong(long preferredBlockSize, long layoutRedundancy,
byte storagePolicyID) {
long h = 0;
if (preferredBlockSize == 0) {
preferredBlockSize = PREFERRED_BLOCK_SIZE.BITS.getMin();
}
h = PREFERRED_BLOCK_SIZE.BITS.combine(preferredBlockSize, h);
h = BLOCK_LAYOUT_AND_REDUNDANCY.BITS.combine(layoutRedundancy, h);
h = STORAGE_POLICY_ID.BITS.combine(storagePolicyID, h);
return h;
}
}
private long header = 0L;
private BlockInfo[] blocks;
INodeFile(long id, byte[] name, PermissionStatus permissions, long mtime,
long atime, BlockInfo[] blklist, short replication,
long preferredBlockSize) {
this(id, name, permissions, mtime, atime, blklist, replication, null,
preferredBlockSize, (byte) 0, CONTIGUOUS);
}
INodeFile(long id, byte[] name, PermissionStatus permissions, long mtime,
long atime, BlockInfo[] blklist, Short replication, Byte ecPolicyID,
long preferredBlockSize, byte storagePolicyID, BlockType blockType) {
super(id, name, permissions, mtime, atime);
final long layoutRedundancy = HeaderFormat.getBlockLayoutRedundancy(
blockType, replication, ecPolicyID);
header = HeaderFormat.toLong(preferredBlockSize, layoutRedundancy,
storagePolicyID);
if (blklist != null && blklist.length > 0) {
for (BlockInfo b : blklist) {
Preconditions.checkArgument(b.getBlockType() == blockType);
}
}
setBlocks(blklist);
}
public INodeFile(INodeFile that) {
super(that);
this.header = that.header;
this.features = that.features;
setBlocks(that.blocks);
}
public INodeFile(INodeFile that, FileDiffList diffs) {
this(that);
Preconditions.checkArgument(!that.isWithSnapshot());
this.addSnapshotFeature(diffs);
}
/** @return true unconditionally. */
@Override
public final boolean isFile() {
return true;
}
/** @return this object. */
@Override
public final INodeFile asFile() {
return this;
}
@Override
public boolean metadataEquals(INodeFileAttributes other) {
return other != null
&& getHeaderLong()== other.getHeaderLong()
&& getPermissionLong() == other.getPermissionLong()
&& getAclFeature() == other.getAclFeature()
&& getXAttrFeature() == other.getXAttrFeature();
}
/* Start of Under-Construction Feature */
/**
* If the inode contains a {@link FileUnderConstructionFeature}, return it;
* otherwise, return null.
*/
public final FileUnderConstructionFeature getFileUnderConstructionFeature() {
return getFeature(FileUnderConstructionFeature.class);
}
/** Is this file under construction? */
@Override // BlockCollection
public boolean isUnderConstruction() {
return getFileUnderConstructionFeature() != null;
}
INodeFile toUnderConstruction(String clientName, String clientMachine) {
Preconditions.checkState(!isUnderConstruction(),
"file is already under construction");
FileUnderConstructionFeature uc = new FileUnderConstructionFeature(
clientName, clientMachine);
addFeature(uc);
return this;
}
/**
* Convert the file to a complete file, i.e., to remove the Under-Construction
* feature.
*/
void toCompleteFile(long mtime, int numCommittedAllowed, short minReplication) {
final FileUnderConstructionFeature uc = getFileUnderConstructionFeature();
Preconditions.checkNotNull(uc, "File %s is not under construction", this);
assertAllBlocksComplete(numCommittedAllowed, minReplication);
removeFeature(uc);
setModificationTime(mtime);
}
/** Assert all blocks are complete. */
private void assertAllBlocksComplete(int numCommittedAllowed,
short minReplication) {
for (int i = 0; i < blocks.length; i++) {
final String err = checkBlockComplete(blocks, i, numCommittedAllowed,
minReplication);
if(err != null) {
throw new IllegalStateException(String.format("Unexpected block state: " +
"%s, file=%s (%s), blocks=%s (i=%s)", err, this,
getClass().getSimpleName(), Arrays.asList(blocks), i));
}
}
}
/**
* Check if the i-th block is COMPLETE;
* when the i-th block is the last block, it may be allowed to be COMMITTED.
*
* @return null if the block passes the check;
* otherwise, return an error message.
*/
static String checkBlockComplete(BlockInfo[] blocks, int i,
int numCommittedAllowed, short minReplication) {
final BlockInfo b = blocks[i];
final BlockUCState state = b.getBlockUCState();
if (state == BlockUCState.COMPLETE) {
return null;
}
if (b.isStriped() || i < blocks.length - numCommittedAllowed) {
return b + " is " + state + " but not COMPLETE";
}
if (state != BlockUCState.COMMITTED) {
return b + " is " + state + " but neither COMPLETE nor COMMITTED";
}
final int numExpectedLocations
= b.getUnderConstructionFeature().getNumExpectedLocations();
if (numExpectedLocations <= minReplication) {
return b + " is " + state + " but numExpectedLocations = "
+ numExpectedLocations + " <= minReplication = " + minReplication;
}
return null;
}
@Override // BlockCollection
public void setBlock(int index, BlockInfo blk) {
Preconditions.checkArgument(blk.isStriped() == this.isStriped());
this.blocks[index] = blk;
}
@Override // BlockCollection, the file should be under construction
public void convertLastBlockToUC(BlockInfo lastBlock,
DatanodeStorageInfo[] locations) throws IOException {
Preconditions.checkState(isUnderConstruction(),
"file is no longer under construction");
if (numBlocks() == 0) {
throw new IOException("Failed to set last block: File is empty.");
}
lastBlock.convertToBlockUnderConstruction(BlockUCState.UNDER_CONSTRUCTION,
locations);
}
void setLastBlock(BlockInfo blk) {
blk.setBlockCollectionId(this.getId());
setBlock(numBlocks() - 1, blk);
}
/**
* Remove a block from the block list. This block should be
* the last one on the list.
*/
BlockInfo removeLastBlock(Block oldblock) {
Preconditions.checkState(isUnderConstruction(),
"file is no longer under construction");
if (blocks.length == 0) {
return null;
}
int size_1 = blocks.length - 1;
if (!blocks[size_1].equals(oldblock)) {
return null;
}
BlockInfo lastBlock = blocks[size_1];
//copy to a new list
BlockInfo[] newlist = new BlockInfo[size_1];
System.arraycopy(blocks, 0, newlist, 0, size_1);
setBlocks(newlist);
lastBlock.delete();
return lastBlock;
}
/* End of Under-Construction Feature */
/* Start of Snapshot Feature */
public FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) {
Preconditions.checkState(!isWithSnapshot(),
"File is already with snapshot");
FileWithSnapshotFeature sf = new FileWithSnapshotFeature(diffs);
this.addFeature(sf);
return sf;
}
/**
* If feature list contains a {@link FileWithSnapshotFeature}, return it;
* otherwise, return null.
*/
public final FileWithSnapshotFeature getFileWithSnapshotFeature() {
return getFeature(FileWithSnapshotFeature.class);
}
/** Is this file has the snapshot feature? */
public final boolean isWithSnapshot() {
return getFileWithSnapshotFeature() != null;
}
@Override
public String toDetailString() {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
return super.toDetailString() + (sf == null ? "" : sf.getDetailedString());
}
@Override
public INodeFileAttributes getSnapshotINode(final int snapshotId) {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null) {
return sf.getDiffs().getSnapshotINode(snapshotId, this);
} else {
return this;
}
}
@Override
public void recordModification(final int latestSnapshotId) {
recordModification(latestSnapshotId, false);
}
public void recordModification(final int latestSnapshotId, boolean withBlocks) {
if (isInLatestSnapshot(latestSnapshotId)
&& !shouldRecordInSrcSnapshot(latestSnapshotId)) {
// the file is in snapshot, create a snapshot feature if it does not have
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf == null) {
sf = addSnapshotFeature(null);
}
// record self in the diff list if necessary
sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null, withBlocks);
}
}
public FileDiffList getDiffs() {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null) {
return sf.getDiffs();
}
return null;
}
/* End of Snapshot Feature */
/** @return the replication factor of the file. */
public final short getFileReplication(int snapshot) {
if (snapshot != CURRENT_STATE_ID) {
return getSnapshotINode(snapshot).getFileReplication();
}
return HeaderFormat.getReplication(header);
}
/**
* The same as getFileReplication(null).
* For erasure coded files, this returns the EC policy ID.
* */
@Override // INodeFileAttributes
public final short getFileReplication() {
if (isStriped()) {
return DEFAULT_REPL_FOR_STRIPED_BLOCKS;
}
return getFileReplication(CURRENT_STATE_ID);
}
public short getPreferredBlockReplication() {
short max = getFileReplication(CURRENT_STATE_ID);
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null) {
short maxInSnapshot = sf.getMaxBlockRepInDiffs(null);
if (sf.isCurrentFileDeleted()) {
return maxInSnapshot;
}
max = maxInSnapshot > max ? maxInSnapshot : max;
}
if(!isStriped()){
return max;
}
ErasureCodingPolicy ecPolicy = ErasureCodingPolicyManager.getInstance()
.getByID(getErasureCodingPolicyID());
Preconditions.checkNotNull(ecPolicy, "Could not find EC policy with ID 0x"
+ StringUtils.byteToHexString(getErasureCodingPolicyID()));
return (short) (ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits());
}
/** Set the replication factor of this file. */
private void setFileReplication(short replication) {
long layoutRedundancy =
HeaderFormat.BLOCK_LAYOUT_AND_REDUNDANCY.BITS.retrieve(header);
layoutRedundancy = (layoutRedundancy &
~HeaderFormat.MAX_REDUNDANCY) | replication;
header = HeaderFormat.BLOCK_LAYOUT_AND_REDUNDANCY.BITS.
combine(layoutRedundancy, header);
}
/** Set the replication factor of this file. */
public final INodeFile setFileReplication(short replication,
int latestSnapshotId) throws QuotaExceededException {
recordModification(latestSnapshotId);
setFileReplication(replication);
return this;
}
/** @return preferred block size (in bytes) of the file. */
@Override
public long getPreferredBlockSize() {
return HeaderFormat.getPreferredBlockSize(header);
}
@Override
public byte getLocalStoragePolicyID() {
return HeaderFormat.getStoragePolicyID(header);
}
@Override
public byte getStoragePolicyID() {
byte id = getLocalStoragePolicyID();
if (id == BLOCK_STORAGE_POLICY_ID_UNSPECIFIED) {
id = this.getParent() != null ?
this.getParent().getStoragePolicyID() : id;
}
// For Striped EC files, we support only suitable policies. Current
// supported policies are HOT, COLD, ALL_SSD.
// If the file was set with any other policies, then we just treat policy as
// BLOCK_STORAGE_POLICY_ID_UNSPECIFIED.
if (isStriped() && id != BLOCK_STORAGE_POLICY_ID_UNSPECIFIED
&& !ErasureCodingPolicyManager
.checkStoragePolicySuitableForECStripedMode(id)) {
id = HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED;
if (LOG.isDebugEnabled()) {
LOG.debug("The current effective storage policy id : " + id
+ " is not suitable for striped mode EC file : " + getName()
+ ". So, just returning unspecified storage policy id");
}
}
return id;
}
private void setStoragePolicyID(byte storagePolicyId) {
header = HeaderFormat.STORAGE_POLICY_ID.BITS.combine(storagePolicyId,
header);
}
public final void setStoragePolicyID(byte storagePolicyId,
int latestSnapshotId) throws QuotaExceededException {
recordModification(latestSnapshotId);
setStoragePolicyID(storagePolicyId);
}
/**
* @return The ID of the erasure coding policy on the file.
*/
@VisibleForTesting
@Override
public byte getErasureCodingPolicyID() {
if (isStriped()) {
return HeaderFormat.getECPolicyID(header);
}
return REPLICATION_POLICY_ID;
}
/**
* @return true if the file is in the striping layout.
*/
@VisibleForTesting
@Override
public boolean isStriped() {
return HeaderFormat.isStriped(header);
}
/**
* @return The type of the INodeFile based on block id.
*/
@VisibleForTesting
@Override
public BlockType getBlockType() {
return HeaderFormat.getBlockType(header);
}
@Override // INodeFileAttributes
public long getHeaderLong() {
return header;
}
/** @return the blocks of the file. */
@Override // BlockCollection
public BlockInfo[] getBlocks() {
return this.blocks;
}
/** @return blocks of the file corresponding to the snapshot. */
public BlockInfo[] getBlocks(int snapshot) {
if (snapshot == CURRENT_STATE_ID || getDiffs() == null) {
return getBlocks();
}
// find blocks stored in snapshot diffs (for truncate)
FileDiff diff = getDiffs().getDiffById(snapshot);
// note that currently FileDiff can only store contiguous blocks
BlockInfo[] snapshotBlocks = diff == null ? getBlocks() : diff.getBlocks();
if (snapshotBlocks != null) {
return snapshotBlocks;
}
// Blocks are not in the current snapshot
// Find next snapshot with blocks present or return current file blocks
snapshotBlocks = getDiffs().findLaterSnapshotBlocks(snapshot);
return (snapshotBlocks == null) ? getBlocks() : snapshotBlocks;
}
/**
* append array of blocks to this.blocks
*/
void concatBlocks(INodeFile[] inodes, BlockManager bm) {
int size = this.blocks.length;
int totalAddedBlocks = 0;
for(INodeFile f : inodes) {
Preconditions.checkState(f.isStriped() == this.isStriped());
totalAddedBlocks += f.blocks.length;
}
BlockInfo[] newlist =
new BlockInfo[size + totalAddedBlocks];
System.arraycopy(this.blocks, 0, newlist, 0, size);
for(INodeFile in: inodes) {
System.arraycopy(in.blocks, 0, newlist, size, in.blocks.length);
size += in.blocks.length;
}
setBlocks(newlist);
for(BlockInfo b : blocks) {
b.setBlockCollectionId(getId());
short oldRepl = b.getReplication();
short repl = getPreferredBlockReplication();
if (oldRepl != repl) {
bm.setReplication(oldRepl, repl, b);
}
}
}
/**
* add a block to the block list
*/
void addBlock(BlockInfo newblock) {
Preconditions.checkArgument(newblock.isStriped() == this.isStriped());
if (this.blocks.length == 0) {
this.setBlocks(new BlockInfo[]{newblock});
} else {
int size = this.blocks.length;
BlockInfo[] newlist = new BlockInfo[size + 1];
System.arraycopy(this.blocks, 0, newlist, 0, size);
newlist[size] = newblock;
this.setBlocks(newlist);
}
}
/** Set the blocks. */
private void setBlocks(BlockInfo[] blocks) {
this.blocks = (blocks != null ? blocks : BlockInfo.EMPTY_ARRAY);
}
/** Clear all blocks of the file. */
public void clearBlocks() {
this.blocks = BlockInfo.EMPTY_ARRAY;
}
/**
* This method replaces blocks in a file with the supplied blocks.
* @param newBlocks List of new blocks.
*/
void replaceBlocks(BlockInfo[] newBlocks) {
this.blocks = Arrays.copyOf(newBlocks, newBlocks.length);
for (BlockInfo block : blocks) {
block.setBlockCollectionId(getId());
}
}
private void updateRemovedUnderConstructionFiles(
ReclaimContext reclaimContext) {
if (isUnderConstruction() && reclaimContext.removedUCFiles != null) {
reclaimContext.removedUCFiles.add(getId());
}
}
@Override
public void cleanSubtree(ReclaimContext reclaimContext,
final int snapshot, int priorSnapshotId) {
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
// TODO: avoid calling getStoragePolicyID
sf.cleanFile(reclaimContext, this, snapshot, priorSnapshotId,
getStoragePolicyID());
updateRemovedUnderConstructionFiles(reclaimContext);
if (sf.getDiffs().isEmpty()) {
this.removeFeature(sf);
}
} else {
if (snapshot == CURRENT_STATE_ID) {
if (priorSnapshotId == NO_SNAPSHOT_ID) {
// this only happens when deleting the current file and it is not
// in any snapshot
destroyAndCollectBlocks(reclaimContext);
} else {
FileUnderConstructionFeature uc = getFileUnderConstructionFeature();
// when deleting the current file and it is in snapshot, we should
// clean the 0-sized block if the file is UC
if (uc != null) {
uc.cleanZeroSizeBlock(this, reclaimContext.collectedBlocks);
updateRemovedUnderConstructionFiles(reclaimContext);
}
}
}
}
}
@Override
public void destroyAndCollectBlocks(ReclaimContext reclaimContext) {
// TODO pass in the storage policy
reclaimContext.quotaDelta().add(computeQuotaUsage(reclaimContext.bsps,
false));
clearFile(reclaimContext);
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
sf.getDiffs().destroyAndCollectSnapshotBlocks(
reclaimContext.collectedBlocks);
sf.clearDiffs();
}
updateRemovedUnderConstructionFiles(reclaimContext);
}
public void clearFile(ReclaimContext reclaimContext) {
if (blocks != null && reclaimContext.collectedBlocks != null) {
for (BlockInfo blk : blocks) {
reclaimContext.collectedBlocks.addDeleteBlock(blk);
}
}
clearBlocks();
if (getAclFeature() != null) {
AclStorage.removeAclFeature(getAclFeature());
}
clear();
reclaimContext.removedINodes.add(this);
}
@Override
public String getName() {
// Get the full path name of this inode.
return getFullPathName();
}
// This is the only place that needs to use the BlockStoragePolicySuite to
// derive the intended storage type usage for quota by storage type
@Override
public final QuotaCounts computeQuotaUsage(BlockStoragePolicySuite bsps,
byte blockStoragePolicyId, boolean useCache, int lastSnapshotId) {
final QuotaCounts counts = new QuotaCounts.Builder().nameSpace(1).build();
final BlockStoragePolicy bsp = (blockStoragePolicyId ==
BLOCK_STORAGE_POLICY_ID_UNSPECIFIED) ? null :
bsps.getPolicy(blockStoragePolicyId);
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf == null) {
counts.add(storagespaceConsumed(bsp));
return counts;
}
FileDiffList fileDiffList = sf.getDiffs();
int last = fileDiffList.getLastSnapshotId();
if (lastSnapshotId == Snapshot.CURRENT_STATE_ID
|| last == Snapshot.CURRENT_STATE_ID) {
counts.add(storagespaceConsumed(bsp));
return counts;
}
final long ssDeltaNoReplication;
short replication;
if (isStriped()) {
return computeQuotaUsageWithStriped(bsp, counts);
}
if (last < lastSnapshotId) {
ssDeltaNoReplication = computeFileSize(true, false);
replication = getFileReplication();
} else {
int sid = fileDiffList.getSnapshotById(lastSnapshotId);
ssDeltaNoReplication = computeFileSize(sid);
replication = getFileReplication(sid);
}
counts.addStorageSpace(ssDeltaNoReplication * replication);
if (bsp != null) {
List<StorageType> storageTypes = bsp.chooseStorageTypes(replication);
for (StorageType t : storageTypes) {
if (!t.supportTypeQuota()) {
continue;
}
counts.addTypeSpace(t, ssDeltaNoReplication);
}
}
return counts;
}
/**
* Compute quota of striped file. Note that currently EC files do not support
* append/hflush/hsync, thus the file length recorded in snapshots should be
* the same with the current file length.
*/
public final QuotaCounts computeQuotaUsageWithStriped(
BlockStoragePolicy bsp, QuotaCounts counts) {
counts.addNameSpace(1);
counts.add(storagespaceConsumed(bsp));
return counts;
}
@Override
public final ContentSummaryComputationContext computeContentSummary(
int snapshotId, final ContentSummaryComputationContext summary) {
final ContentCounts counts = summary.getCounts();
counts.addContent(Content.FILE, 1);
final long fileLen = computeFileSize(snapshotId);
counts.addContent(Content.LENGTH, fileLen);
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf == null) {
counts.addContent(Content.DISKSPACE,
storagespaceConsumed(null).getStorageSpace());
} else if (isStriped()) {
counts.addContent(Content.DISKSPACE,
storagespaceConsumedStriped().getStorageSpace());
} else {
long diskSpaceQuota = getDiskSpaceQuota(counts, sf, snapshotId);
counts.addContent(Content.DISKSPACE, diskSpaceQuota);
}
if (getStoragePolicyID() != BLOCK_STORAGE_POLICY_ID_UNSPECIFIED){
BlockStoragePolicy bsp = summary.getBlockStoragePolicySuite().
getPolicy(getStoragePolicyID());
List<StorageType> storageTypes = bsp.chooseStorageTypes(getFileReplication());
for (StorageType t : storageTypes) {
if (!t.supportTypeQuota()) {
continue;
}
counts.addTypeSpace(t, fileLen);
}
}
return summary;
}
/**
* Compute disk space consumed by all the blocks in snapshots.
*/
private long getDiskSpaceQuota(ContentCounts counts,
FileWithSnapshotFeature sf, int lastSnapshotId) {
FileDiffList fileDiffList = sf.getDiffs();
int last = fileDiffList.getLastSnapshotId();
if (lastSnapshotId == Snapshot.CURRENT_STATE_ID
|| last == Snapshot.CURRENT_STATE_ID) {
return storagespaceConsumed(null).getStorageSpace();
}
final long ssDeltaNoReplication;
short replication;
if (last < lastSnapshotId) {
ssDeltaNoReplication = computeFileSize(true, false);
replication = getFileReplication();
} else {
int sid = fileDiffList.getSnapshotById(lastSnapshotId);
ssDeltaNoReplication = computeFileSize(sid);
replication = getFileReplication(sid);
}
return ssDeltaNoReplication * replication;
}
/** The same as computeFileSize(null). */
public final long computeFileSize() {
return computeFileSize(CURRENT_STATE_ID);
}
/**
* Compute file size of the current file if the given snapshot is null;
* otherwise, get the file size from the given snapshot.
*/
public final long computeFileSize(int snapshotId) {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (snapshotId != CURRENT_STATE_ID && sf != null) {
final FileDiff d = sf.getDiffs().getDiffById(snapshotId);
if (d != null) {
return d.getFileSize();
}
}
return computeFileSize(true, false);
}
/**
* Compute file size of the current file size
* but not including the last block if it is under construction.
*/
public final long computeFileSizeNotIncludingLastUcBlock() {
return computeFileSize(false, false);
}
/**
* Compute file size of the current file.
*
* @param includesLastUcBlock
* If the last block is under construction, should it be included?
* @param usePreferredBlockSize4LastUcBlock
* If the last block is under construction, should we use actual
* block size or preferred block size?
* Note that usePreferredBlockSize4LastUcBlock is ignored
* if includesLastUcBlock == false.
* @return file size
*/
public final long computeFileSize(boolean includesLastUcBlock,
boolean usePreferredBlockSize4LastUcBlock) {
if (blocks.length == 0) {
return 0;
}
final int last = blocks.length - 1;
//check if the last block is BlockInfoUnderConstruction
BlockInfo lastBlk = blocks[last];
long size = lastBlk.getNumBytes();
if (!lastBlk.isComplete()) {
if (!includesLastUcBlock) {
size = 0;
} else if (usePreferredBlockSize4LastUcBlock) {
size = isStriped()?
getPreferredBlockSize() *
((BlockInfoStriped)lastBlk).getDataBlockNum() :
getPreferredBlockSize();
}
}
//sum other blocks
for (int i = 0; i < last; i++) {
size += blocks[i].getNumBytes();
}
return size;
}
/**
* Compute size consumed by all blocks of the current file,
* including blocks in its snapshots.
* Use preferred block size for the last block if it is under construction.
*/
public final QuotaCounts storagespaceConsumed(BlockStoragePolicy bsp) {
if (isStriped()) {
return storagespaceConsumedStriped();
} else {
return storagespaceConsumedContiguous(bsp);
}
}
// TODO: support EC with heterogeneous storage
public final QuotaCounts storagespaceConsumedStriped() {
QuotaCounts counts = new QuotaCounts.Builder().build();
for (BlockInfo b : blocks) {
Preconditions.checkState(b.isStriped());
long blockSize = b.isComplete() ?
((BlockInfoStriped)b).spaceConsumed() : getPreferredBlockSize() *
((BlockInfoStriped)b).getTotalBlockNum();
counts.addStorageSpace(blockSize);
}
return counts;
}
public final QuotaCounts storagespaceConsumedContiguous(
BlockStoragePolicy bsp) {
QuotaCounts counts = new QuotaCounts.Builder().build();
final Iterable<BlockInfo> blocks;
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf == null) {
blocks = Arrays.asList(getBlocks());
} else {
// Collect all distinct blocks
Set<BlockInfo> allBlocks = new HashSet<>(Arrays.asList(getBlocks()));
DiffList<FileDiff> diffs = sf.getDiffs().asList();
for(FileDiff diff : diffs) {
BlockInfo[] diffBlocks = diff.getBlocks();
if (diffBlocks != null) {
allBlocks.addAll(Arrays.asList(diffBlocks));
}
}
blocks = allBlocks;
}
final short replication = getPreferredBlockReplication();
for (BlockInfo b : blocks) {
long blockSize = b.isComplete() ? b.getNumBytes() :
getPreferredBlockSize();
counts.addStorageSpace(blockSize * replication);
if (bsp != null) {
List<StorageType> types = bsp.chooseStorageTypes(replication);
for (StorageType t : types) {
if (t.supportTypeQuota()) {
counts.addTypeSpace(t, blockSize);
}
}
}
}
return counts;
}
/**
* Return the penultimate allocated block for this file.
*/
BlockInfo getPenultimateBlock() {
if (blocks.length <= 1) {
return null;
}
return blocks[blocks.length - 2];
}
@Override
public BlockInfo getLastBlock() {
return blocks.length == 0 ? null: blocks[blocks.length-1];
}
@Override
public int numBlocks() {
return blocks.length;
}
@VisibleForTesting
@Override
public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
final int snapshotId) {
super.dumpTreeRecursively(out, prefix, snapshotId);
out.print(", fileSize=" + computeFileSize(snapshotId));
// only compare the first block
out.print(", blocks=");
out.print(blocks.length == 0 ? null: blocks[0]);
out.println();
final FileWithSnapshotFeature snapshotFeature =
getFileWithSnapshotFeature();
if (snapshotFeature != null) {
if (prefix.length() >= 2) {
prefix.setLength(prefix.length() - 2);
prefix.append(" ");
}
out.print(prefix);
out.print(snapshotFeature);
}
out.println();
}
/**
* Remove full blocks at the end file up to newLength
* @return sum of sizes of the remained blocks
*/
public long collectBlocksBeyondMax(final long max,
final BlocksMapUpdateInfo collectedBlocks, Set<BlockInfo> toRetain) {
final BlockInfo[] oldBlocks = getBlocks();
if (oldBlocks == null) {
return 0;
}
// find the minimum n such that the size of the first n blocks > max
int n = 0;
long size = 0;
for(; n < oldBlocks.length && max > size; n++) {
size += oldBlocks[n].getNumBytes();
}
if (n >= oldBlocks.length) {
return size;
}
// starting from block n, the data is beyond max.
// resize the array.
truncateBlocksTo(n);
// collect the blocks beyond max
if (collectedBlocks != null) {
for(; n < oldBlocks.length; n++) {
final BlockInfo del = oldBlocks[n];
if (toRetain == null || !toRetain.contains(del)) {
collectedBlocks.addDeleteBlock(del);
}
}
}
return size;
}
/**
* compute the quota usage change for a truncate op
* @param newLength the length for truncation
* TODO: properly handle striped blocks (HDFS-7622)
**/
void computeQuotaDeltaForTruncate(
long newLength, BlockStoragePolicy bsps,
QuotaCounts delta) {
final BlockInfo[] blocks = getBlocks();
if (blocks.length == 0) {
return;
}
long size = 0;
for (BlockInfo b : blocks) {
size += b.getNumBytes();
}
BlockInfo[] sblocks = null;
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
FileDiff diff = sf.getDiffs().getLast();
sblocks = diff != null ? diff.getBlocks() : null;
}
for (int i = blocks.length - 1; i >= 0 && size > newLength;
size -= blocks[i].getNumBytes(), --i) {
BlockInfo bi = blocks[i];
long truncatedBytes;
if (size - newLength < bi.getNumBytes()) {
// Record a full block as the last block will be copied during
// recovery
truncatedBytes = bi.getNumBytes() - getPreferredBlockSize();
} else {
truncatedBytes = bi.getNumBytes();
}
// The block exist in snapshot, adding back the truncated bytes in the
// existing files
if (sblocks != null && i < sblocks.length && bi.equals(sblocks[i])) {
truncatedBytes -= bi.getNumBytes();
}
delta.addStorageSpace(-truncatedBytes * bi.getReplication());
if (bsps != null) {
List<StorageType> types = bsps.chooseStorageTypes(bi.getReplication());
for (StorageType t : types) {
if (t.supportTypeQuota()) {
delta.addTypeSpace(t, -truncatedBytes);
}
}
}
}
}
void truncateBlocksTo(int n) {
final BlockInfo[] newBlocks;
if (n == 0) {
newBlocks = BlockInfo.EMPTY_ARRAY;
} else {
newBlocks = new BlockInfo[n];
System.arraycopy(getBlocks(), 0, newBlocks, 0, n);
}
// set new blocks
setBlocks(newBlocks);
}
/**
* This function is only called when block list is stored in snapshot
* diffs. Note that this can only happen when truncation happens with
* snapshots. Since we do not support truncation with striped blocks,
* we only need to handle contiguous blocks here.
*/
public void collectBlocksBeyondSnapshot(BlockInfo[] snapshotBlocks,
BlocksMapUpdateInfo collectedBlocks) {
Preconditions.checkState(!isStriped());
BlockInfo[] oldBlocks = getBlocks();
if(snapshotBlocks == null || oldBlocks == null)
return;
// Skip blocks in common between the file and the snapshot
int n = 0;
while(n < oldBlocks.length && n < snapshotBlocks.length &&
oldBlocks[n] == snapshotBlocks[n]) {
n++;
}
truncateBlocksTo(n);
// Collect the remaining blocks of the file
while(n < oldBlocks.length) {
collectedBlocks.addDeleteBlock(oldBlocks[n++]);
}
}
/** Exclude blocks collected for deletion that belong to a snapshot. */
Set<BlockInfo> getSnapshotBlocksToRetain(int snapshotId) {
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if(sf == null) {
return null;
}
BlockInfo[] snapshotBlocks = getDiffs().findEarlierSnapshotBlocks(snapshotId);
if(snapshotBlocks == null) {
return null;
}
Set<BlockInfo> toRetain = new HashSet<>(snapshotBlocks.length);
Collections.addAll(toRetain, snapshotBlocks);
return toRetain;
}
/**
* @return true if the block is contained in a snapshot or false otherwise.
*/
boolean isBlockInLatestSnapshot(BlockInfo block) {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf == null || sf.getDiffs() == null) {
return false;
}
BlockInfo[] snapshotBlocks = getDiffs()
.findEarlierSnapshotBlocks(getDiffs().getLastSnapshotId());
return snapshotBlocks != null &&
Arrays.asList(snapshotBlocks).contains(block);
}
/**
* Update Header with new Block Layout and Redundancy bits.
* @param newBlockLayoutPolicy new block layout policy.
* @param newStoragePolicy new storage policy ID.
*/
void updateHeaderWithNewPolicy(byte newBlockLayoutPolicy,
byte newStoragePolicy) {
this.header = HeaderFormat.toLong(
HeaderFormat.getPreferredBlockSize(header),
newBlockLayoutPolicy,
newStoragePolicy);
}
}