blob: 101a158fbc1d8998f6509255e5b7186062e34d52 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import com.google.common.base.Preconditions;
/**
* An anonymous reference to an inode.
*
* This class and its subclasses are used to support multiple access paths.
* A file/directory may have multiple access paths when it is stored in some
* snapshots and it is renamed/moved to other locations.
*
* For example,
* (1) Support we have /abc/foo, say the inode of foo is inode(id=1000,name=foo)
* (2) create snapshot s0 for /abc
* (3) mv /abc/foo /xyz/bar, i.e. inode(id=1000,name=...) is renamed from "foo"
* to "bar" and its parent becomes /xyz.
*
* Then, /xyz/bar and /abc/.snapshot/s0/foo are two different access paths to
* the same inode, inode(id=1000,name=bar).
*
* With references, we have the following
* - /abc has a child ref(id=1001,name=foo).
* - /xyz has a child ref(id=1002)
* - Both ref(id=1001,name=foo) and ref(id=1002) point to another reference,
* ref(id=1003,count=2).
* - Finally, ref(id=1003,count=2) points to inode(id=1000,name=bar).
*
* Note 1: For a reference without name, e.g. ref(id=1002), it uses the name
* of the referred inode.
* Note 2: getParent() always returns the parent in the current state, e.g.
* inode(id=1000,name=bar).getParent() returns /xyz but not /abc.
*/
public abstract class INodeReference extends INode {
/**
* Try to remove the given reference and then return the reference count.
* If the given inode is not a reference, return -1;
*/
public static int tryRemoveReference(INode inode) {
if (!inode.isReference()) {
return -1;
}
return removeReference(inode.asReference());
}
/**
* Remove the given reference and then return the reference count.
* If the referred inode is not a WithCount, return -1;
*/
private static int removeReference(INodeReference ref) {
final INode referred = ref.getReferredINode();
if (!(referred instanceof WithCount)) {
return -1;
}
WithCount wc = (WithCount) referred;
wc.removeReference(ref);
return wc.getReferenceCount();
}
/**
* When destroying a reference node (WithName or DstReference), we call this
* method to identify the snapshot which is the latest snapshot before the
* reference node's creation.
*/
static Snapshot getPriorSnapshot(INodeReference ref) {
WithCount wc = (WithCount) ref.getReferredINode();
WithName wn = null;
if (ref instanceof DstReference) {
wn = wc.getLastWithName();
} else if (ref instanceof WithName) {
wn = wc.getPriorWithName((WithName) ref);
}
if (wn != null) {
INode referred = wc.getReferredINode();
if (referred instanceof FileWithSnapshot) {
return ((FileWithSnapshot) referred).getDiffs().getPrior(
wn.lastSnapshotId);
} else if (referred instanceof INodeDirectoryWithSnapshot) {
return ((INodeDirectoryWithSnapshot) referred).getDiffs().getPrior(
wn.lastSnapshotId);
}
}
return null;
}
private INode referred;
public INodeReference(INode parent, INode referred) {
super(parent);
this.referred = referred;
}
public final INode getReferredINode() {
return referred;
}
public final void setReferredINode(INode referred) {
this.referred = referred;
}
@Override
public final boolean isReference() {
return true;
}
@Override
public final INodeReference asReference() {
return this;
}
@Override
public final boolean isFile() {
return referred.isFile();
}
@Override
public final INodeFile asFile() {
return referred.asFile();
}
@Override
public final boolean isDirectory() {
return referred.isDirectory();
}
@Override
public final INodeDirectory asDirectory() {
return referred.asDirectory();
}
@Override
public final boolean isSymlink() {
return referred.isSymlink();
}
@Override
public final INodeSymlink asSymlink() {
return referred.asSymlink();
}
@Override
public byte[] getLocalNameBytes() {
return referred.getLocalNameBytes();
}
@Override
public void setLocalName(byte[] name) {
referred.setLocalName(name);
}
@Override
public final long getId() {
return referred.getId();
}
@Override
public final PermissionStatus getPermissionStatus(Snapshot snapshot) {
return referred.getPermissionStatus(snapshot);
}
@Override
public final String getUserName(Snapshot snapshot) {
return referred.getUserName(snapshot);
}
@Override
final void setUser(String user) {
referred.setUser(user);
}
@Override
public final String getGroupName(Snapshot snapshot) {
return referred.getGroupName(snapshot);
}
@Override
final void setGroup(String group) {
referred.setGroup(group);
}
@Override
public final FsPermission getFsPermission(Snapshot snapshot) {
return referred.getFsPermission(snapshot);
}
@Override
void setPermission(FsPermission permission) {
referred.setPermission(permission);
}
@Override
public final long getModificationTime(Snapshot snapshot) {
return referred.getModificationTime(snapshot);
}
@Override
public final INode updateModificationTime(long mtime, Snapshot latest,
INodeMap inodeMap) throws QuotaExceededException {
return referred.updateModificationTime(mtime, latest, inodeMap);
}
@Override
public final void setModificationTime(long modificationTime) {
referred.setModificationTime(modificationTime);
}
@Override
public final long getAccessTime(Snapshot snapshot) {
return referred.getAccessTime(snapshot);
}
@Override
public final void setAccessTime(long accessTime) {
referred.setAccessTime(accessTime);
}
@Override
final INode recordModification(Snapshot latest, final INodeMap inodeMap)
throws QuotaExceededException {
referred.recordModification(latest, inodeMap);
// reference is never replaced
return this;
}
@Override // used by WithCount
public Quota.Counts cleanSubtree(Snapshot snapshot, Snapshot prior,
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes)
throws QuotaExceededException {
return referred.cleanSubtree(snapshot, prior, collectedBlocks,
removedINodes);
}
@Override // used by WithCount
public void destroyAndCollectBlocks(
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
if (removeReference(this) <= 0) {
referred.destroyAndCollectBlocks(collectedBlocks, removedINodes);
}
}
@Override
public Content.Counts computeContentSummary(Content.Counts counts) {
return referred.computeContentSummary(counts);
}
@Override
public Quota.Counts computeQuotaUsage(Quota.Counts counts, boolean useCache,
int lastSnapshotId) {
return referred.computeQuotaUsage(counts, useCache, lastSnapshotId);
}
@Override
public final INode getSnapshotINode(Snapshot snapshot) {
return referred.getSnapshotINode(snapshot);
}
@Override
public final long getNsQuota() {
return referred.getNsQuota();
}
@Override
public final long getDsQuota() {
return referred.getDsQuota();
}
@Override
public final void clear() {
super.clear();
referred = null;
}
@Override
public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
final Snapshot snapshot) {
super.dumpTreeRecursively(out, prefix, snapshot);
if (this instanceof DstReference) {
out.print(", dstSnapshotId=" + ((DstReference) this).dstSnapshotId);
}
if (this instanceof WithCount) {
out.print(", count=" + ((WithCount)this).getReferenceCount());
}
out.println();
final StringBuilder b = new StringBuilder();
for(int i = 0; i < prefix.length(); i++) {
b.append(' ');
}
b.append("->");
getReferredINode().dumpTreeRecursively(out, b, snapshot);
}
public int getDstSnapshotId() {
return Snapshot.INVALID_ID;
}
/** An anonymous reference with reference count. */
public static class WithCount extends INodeReference {
private final List<WithName> withNameList = new ArrayList<WithName>();
/**
* Compare snapshot with IDs, where null indicates the current status thus
* is greater than any non-null snapshot.
*/
public static final Comparator<WithName> WITHNAME_COMPARATOR
= new Comparator<WithName>() {
@Override
public int compare(WithName left, WithName right) {
return left.lastSnapshotId - right.lastSnapshotId;
}
};
public WithCount(INodeReference parent, INode referred) {
super(parent, referred);
Preconditions.checkArgument(!referred.isReference());
referred.setParentReference(this);
}
public int getReferenceCount() {
int count = withNameList.size();
if (getParentReference() != null) {
count++;
}
return count;
}
/** Increment and then return the reference count. */
public void addReference(INodeReference ref) {
if (ref instanceof WithName) {
WithName refWithName = (WithName) ref;
int i = Collections.binarySearch(withNameList, refWithName,
WITHNAME_COMPARATOR);
Preconditions.checkState(i < 0);
withNameList.add(-i - 1, refWithName);
} else if (ref instanceof DstReference) {
setParentReference(ref);
}
}
/** Decrement and then return the reference count. */
public void removeReference(INodeReference ref) {
if (ref instanceof WithName) {
int i = Collections.binarySearch(withNameList, (WithName) ref,
WITHNAME_COMPARATOR);
if (i >= 0) {
withNameList.remove(i);
}
} else if (ref == getParentReference()) {
setParent(null);
}
}
WithName getLastWithName() {
return withNameList.size() > 0 ?
withNameList.get(withNameList.size() - 1) : null;
}
WithName getPriorWithName(WithName post) {
int i = Collections.binarySearch(withNameList, post, WITHNAME_COMPARATOR);
if (i > 0) {
return withNameList.get(i - 1);
} else if (i == 0 || i == -1) {
return null;
} else {
return withNameList.get(-i - 2);
}
}
@Override
public final void addSpaceConsumed(long nsDelta, long dsDelta,
boolean verify, int snapshotId) throws QuotaExceededException {
INodeReference parentRef = getParentReference();
if (parentRef != null) {
parentRef.addSpaceConsumed(nsDelta, dsDelta, verify, snapshotId);
}
addSpaceConsumedToRenameSrc(nsDelta, dsDelta, verify, snapshotId);
}
@Override
public final void addSpaceConsumedToRenameSrc(long nsDelta, long dsDelta,
boolean verify, int snapshotId) throws QuotaExceededException {
if (snapshotId != Snapshot.INVALID_ID) {
for (INodeReference.WithName withName : withNameList) {
if (withName.getLastSnapshotId() >= snapshotId) {
withName.addSpaceConsumed(nsDelta, dsDelta, verify, snapshotId);
break;
}
}
}
}
}
/** A reference with a fixed name. */
public static class WithName extends INodeReference {
private final byte[] name;
/**
* The id of the last snapshot in the src tree when this WithName node was
* generated. When calculating the quota usage of the referred node, only
* the files/dirs existing when this snapshot was taken will be counted for
* this WithName node and propagated along its ancestor path.
*/
private final int lastSnapshotId;
public WithName(INodeDirectory parent, WithCount referred, byte[] name,
int lastSnapshotId) {
super(parent, referred);
this.name = name;
this.lastSnapshotId = lastSnapshotId;
referred.addReference(this);
}
@Override
public final byte[] getLocalNameBytes() {
return name;
}
@Override
public final void setLocalName(byte[] name) {
throw new UnsupportedOperationException("Cannot set name: " + getClass()
+ " is immutable.");
}
public int getLastSnapshotId() {
return lastSnapshotId;
}
@Override
public final Content.Counts computeContentSummary(Content.Counts counts) {
//only count diskspace for WithName
final Quota.Counts q = Quota.Counts.newInstance();
computeQuotaUsage(q, false, lastSnapshotId);
counts.add(Content.DISKSPACE, q.get(Quota.DISKSPACE));
return counts;
}
@Override
public final Quota.Counts computeQuotaUsage(Quota.Counts counts,
boolean useCache, int lastSnapshotId) {
// if this.lastSnapshotId < lastSnapshotId, the rename of the referred
// node happened before the rename of its ancestor. This should be
// impossible since for WithName node we only count its children at the
// time of the rename.
Preconditions.checkState(this.lastSnapshotId >= lastSnapshotId);
final INode referred = this.getReferredINode().asReference()
.getReferredINode();
// we cannot use cache for the referred node since its cached quota may
// have already been updated by changes in the current tree
return referred.computeQuotaUsage(counts, false, this.lastSnapshotId);
}
@Override
public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
final BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes) throws QuotaExceededException {
// since WithName node resides in deleted list acting as a snapshot copy,
// the parameter snapshot must be non-null
Preconditions.checkArgument(snapshot != null);
// if prior is null, we need to check snapshot belonging to the previous
// WithName instance
if (prior == null) {
prior = getPriorSnapshot(this);
}
Quota.Counts counts = getReferredINode().cleanSubtree(snapshot, prior,
collectedBlocks, removedINodes);
INodeReference ref = getReferredINode().getParentReference();
if (ref != null) {
ref.addSpaceConsumed(-counts.get(Quota.NAMESPACE),
-counts.get(Quota.DISKSPACE), true, Snapshot.INVALID_ID);
}
return counts;
}
@Override
public void destroyAndCollectBlocks(BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes) {
Snapshot snapshot = getSelfSnapshot();
if (removeReference(this) <= 0) {
getReferredINode().destroyAndCollectBlocks(collectedBlocks,
removedINodes);
} else {
Snapshot prior = getPriorSnapshot(this);
INode referred = getReferredINode().asReference().getReferredINode();
if (snapshot != null) {
if (prior != null && snapshot.getId() <= prior.getId()) {
// the snapshot to be deleted has been deleted while traversing
// the src tree of the previous rename operation. This usually
// happens when rename's src and dst are under the same
// snapshottable directory. E.g., the following operation sequence:
// 1. create snapshot s1 on /test
// 2. rename /test/foo/bar to /test/foo2/bar
// 3. create snapshot s2 on /test
// 4. delete snapshot s2
return;
}
try {
Quota.Counts counts = referred.cleanSubtree(snapshot, prior,
collectedBlocks, removedINodes);
INodeReference ref = getReferredINode().getParentReference();
if (ref != null) {
ref.addSpaceConsumed(-counts.get(Quota.NAMESPACE),
-counts.get(Quota.DISKSPACE), true, Snapshot.INVALID_ID);
}
} catch (QuotaExceededException e) {
LOG.error("should not exceed quota while snapshot deletion", e);
}
}
}
}
private Snapshot getSelfSnapshot() {
INode referred = getReferredINode().asReference().getReferredINode();
Snapshot snapshot = null;
if (referred instanceof FileWithSnapshot) {
snapshot = ((FileWithSnapshot) referred).getDiffs().getPrior(
lastSnapshotId);
} else if (referred instanceof INodeDirectoryWithSnapshot) {
snapshot = ((INodeDirectoryWithSnapshot) referred).getDiffs().getPrior(
lastSnapshotId);
}
return snapshot;
}
}
public static class DstReference extends INodeReference {
/**
* Record the latest snapshot of the dst subtree before the rename. For
* later operations on the moved/renamed files/directories, if the latest
* snapshot is after this dstSnapshot, changes will be recorded to the
* latest snapshot. Otherwise changes will be recorded to the snapshot
* belonging to the src of the rename.
*
* {@link Snapshot#INVALID_ID} means no dstSnapshot (e.g., src of the
* first-time rename).
*/
private final int dstSnapshotId;
@Override
public final int getDstSnapshotId() {
return dstSnapshotId;
}
public DstReference(INodeDirectory parent, WithCount referred,
final int dstSnapshotId) {
super(parent, referred);
this.dstSnapshotId = dstSnapshotId;
referred.addReference(this);
}
@Override
public Quota.Counts cleanSubtree(Snapshot snapshot, Snapshot prior,
BlocksMapUpdateInfo collectedBlocks, List<INode> removedINodes)
throws QuotaExceededException {
if (snapshot == null && prior == null) {
Quota.Counts counts = Quota.Counts.newInstance();
this.computeQuotaUsage(counts, true);
destroyAndCollectBlocks(collectedBlocks, removedINodes);
return counts;
} else {
// if prior is null, we need to check snapshot belonging to the previous
// WithName instance
if (prior == null) {
prior = getPriorSnapshot(this);
}
if (snapshot != null && snapshot.equals(prior)) {
return Quota.Counts.newInstance();
}
return getReferredINode().cleanSubtree(snapshot, prior,
collectedBlocks, removedINodes);
}
}
/**
* {@inheritDoc}
* <br/>
* To destroy a DstReference node, we first remove its link with the
* referred node. If the reference number of the referred node is <= 0, we
* destroy the subtree of the referred node. Otherwise, we clean the
* referred node's subtree and delete everything created after the last
* rename operation, i.e., everything outside of the scope of the prior
* WithName nodes.
*/
@Override
public void destroyAndCollectBlocks(
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
if (removeReference(this) <= 0) {
getReferredINode().destroyAndCollectBlocks(collectedBlocks,
removedINodes);
} else {
// we will clean everything, including files, directories, and
// snapshots, that were created after this prior snapshot
Snapshot prior = getPriorSnapshot(this);
// prior must be non-null, otherwise we do not have any previous
// WithName nodes, and the reference number will be 0.
Preconditions.checkState(prior != null);
// identify the snapshot created after prior
Snapshot snapshot = getSelfSnapshot(prior);
INode referred = getReferredINode().asReference().getReferredINode();
if (referred instanceof FileWithSnapshot) {
// if referred is a file, it must be a FileWithSnapshot since we did
// recordModification before the rename
FileWithSnapshot sfile = (FileWithSnapshot) referred;
// make sure we mark the file as deleted
sfile.deleteCurrentFile();
if (snapshot != null) {
try {
referred.cleanSubtree(snapshot, prior, collectedBlocks,
removedINodes);
} catch (QuotaExceededException e) {
LOG.error("should not exceed quota while snapshot deletion", e);
}
}
} else if (referred instanceof INodeDirectoryWithSnapshot) {
// similarly, if referred is a directory, it must be an
// INodeDirectoryWithSnapshot
INodeDirectoryWithSnapshot sdir =
(INodeDirectoryWithSnapshot) referred;
try {
INodeDirectoryWithSnapshot.destroyDstSubtree(sdir, snapshot, prior,
collectedBlocks, removedINodes);
} catch (QuotaExceededException e) {
LOG.error("should not exceed quota while snapshot deletion", e);
}
}
}
}
private Snapshot getSelfSnapshot(final Snapshot prior) {
WithCount wc = (WithCount) getReferredINode().asReference();
INode referred = wc.getReferredINode();
Snapshot lastSnapshot = null;
if (referred instanceof FileWithSnapshot) {
lastSnapshot = ((FileWithSnapshot) referred).getDiffs()
.getLastSnapshot();
} else if (referred instanceof INodeDirectoryWithSnapshot) {
lastSnapshot = ((INodeDirectoryWithSnapshot) referred)
.getLastSnapshot();
}
if (lastSnapshot != null && !lastSnapshot.equals(prior)) {
return lastSnapshot;
} else {
return null;
}
}
}
}