blob: b3c608395c19fcfabf9beee5e103657300625876 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.crypto.CipherSuite;
import org.apache.hadoop.crypto.CryptoProtocolVersion;
import org.apache.hadoop.fs.FileEncryptionInfo;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.fs.XAttr;
import org.apache.hadoop.fs.XAttrSetFlag;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.XAttrHelper;
import org.apache.hadoop.hdfs.hdfsdb.Options;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
import org.apache.hadoop.hdfs.protocol.EncryptionZone;
import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.util.ByteArray;
import org.apache.hadoop.hdfs.util.EnumCounters;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import static org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.*;
import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE;
import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_FILE_ENCRYPTION_INFO;
import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID;
import static org.apache.hadoop.hdfs.server.namenode.INodeId.ROOT_INODE_ID;
import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID;
import static org.apache.hadoop.util.Time.now;
/**
* Both FSDirectory and FSNamesystem manage the state of the namespace.
* FSDirectory is a pure in-memory data structure, all of whose operations
* happen entirely in memory. In contrast, FSNamesystem persists the operations
* to the disk.
* @see org.apache.hadoop.hdfs.server.namenode.FSNamesystem
**/
@InterfaceAudience.Private
public class FSDirectory implements Closeable {
static final Logger LOG = LoggerFactory.getLogger(FSDirectory.class);
private static INodeDirectory createRoot(FSNamesystem namesystem) {
final INodeDirectory r = new INodeDirectory(
ROOT_INODE_ID,
INodeDirectory.ROOT_NAME,
namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
0L);
r.addDirectoryWithQuotaFeature(
new DirectoryWithQuotaFeature.Builder().
nameSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA).
storageSpaceQuota(DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA).
build());
r.addSnapshottableFeature();
r.setSnapshotQuota(0);
return r;
}
private ByteString createRootForFlatNS(FSNamesystem namesystem) {
PermissionStatus perm = namesystem.createFsOwnerPermissions(
new FsPermission((short) 0755));
ByteString b = new FlatINode.Builder()
.id(ROOT_INODE_ID)
.parentId(INVALID_INODE_ID)
.userId(ugid.getId(perm.getUserName()))
.groupId(ugid.getId(perm.getGroupName()))
.permission((short) 0755)
.mtime(now())
.build();
return b;
}
@VisibleForTesting
static boolean CHECK_RESERVED_FILE_NAMES = true;
public final static String DOT_RESERVED_STRING = ".reserved";
public final static String DOT_RESERVED_PATH_PREFIX = Path.SEPARATOR
+ DOT_RESERVED_STRING;
public final static byte[] DOT_RESERVED =
DFSUtil.string2Bytes(DOT_RESERVED_STRING);
private final static String RAW_STRING = "raw";
private final static byte[] RAW = DFSUtil.string2Bytes(RAW_STRING);
public final static String DOT_INODES_STRING = ".inodes";
public final static byte[] DOT_INODES =
DFSUtil.string2Bytes(DOT_INODES_STRING);
INodeDirectory rootDir;
private final FSNamesystem namesystem;
private volatile boolean skipQuotaCheck = false; //skip while consuming edits
private final int maxComponentLength;
private final int maxDirItems;
private final int lsLimit; // max list limit
private final int contentCountLimit; // max content summary counts per run
private final long contentSleepMicroSec;
private final INodeMap inodeMap; // Synchronized by dirLock
private long yieldCount = 0; // keep track of lock yield count.
private final int inodeXAttrsLimit; //inode xattrs max limit
// lock to protect the directory and BlockMap
private final ReentrantReadWriteLock dirLock;
private final boolean isPermissionEnabled;
/**
* Support for ACLs is controlled by a configuration flag. If the
* configuration flag is false, then the NameNode will reject all
* ACL-related operations.
*/
private final boolean aclsEnabled;
private final boolean xattrsEnabled;
private final int xattrMaxSize;
// precision of access times.
private final long accessTimePrecision;
// whether setStoragePolicy is allowed.
private final boolean storagePolicyEnabled;
// whether quota by storage type is allowed
private final boolean quotaByStorageTypeEnabled;
private final String fsOwnerShortUserName;
private final String supergroup;
private final INodeId inodeId;
private final FSEditLog editLog;
private INodeAttributeProvider attributeProvider;
private final boolean enableLevelDb;
private final org.apache.hadoop.hdfs.hdfsdb.DB levelDb;
org.apache.hadoop.hdfs.hdfsdb.DB getLevelDb() {
return levelDb;
}
public void setINodeAttributeProvider(INodeAttributeProvider provider) {
attributeProvider = provider;
}
// utility methods to acquire and release read lock and write lock
void readLock() {
this.dirLock.readLock().lock();
}
void readUnlock() {
this.dirLock.readLock().unlock();
}
void writeLock() {
this.dirLock.writeLock().lock();
}
void writeUnlock() {
this.dirLock.writeLock().unlock();
}
boolean hasWriteLock() {
return this.dirLock.isWriteLockedByCurrentThread();
}
boolean hasReadLock() {
return this.dirLock.getReadHoldCount() > 0 || hasWriteLock();
}
public int getReadHoldCount() {
return this.dirLock.getReadHoldCount();
}
public int getWriteHoldCount() {
return this.dirLock.getWriteHoldCount();
}
@VisibleForTesting
public final EncryptionZoneManager ezManager;
/**
* Caches frequently used file names used in {@link INode} to reuse
* byte[] objects and reduce heap usage.
*/
private final NameCache<ByteArray> nameCache;
private final DB db;
// Mapping user / group name into id
private final StringMap ugid = new StringMap();
DB db() {
return db;
}
StringMap ugid() {
return ugid;
}
RWTransaction newRWTransaction() {
return enableLevelDb ? new LevelDBRWTransaction(this) : new RWTransaction(this);
}
public ROTransaction newROTransaction() {
return enableLevelDb ? new LevelDBROTransaction(this, levelDb)
: new ROTransaction(this);
}
public ReplayTransaction newReplayTransaction() {
return new ReplayTransaction(this);
}
FSDirectory(FSNamesystem ns, Configuration conf) throws IOException {
this.dirLock = new ReentrantReadWriteLock(true); // fair
this.inodeId = new INodeId();
rootDir = createRoot(ns);
inodeMap = INodeMap.newInstance(rootDir);
this.isPermissionEnabled = conf.getBoolean(
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY,
DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT);
this.fsOwnerShortUserName =
UserGroupInformation.getCurrentUser().getShortUserName();
this.supergroup = conf.get(
DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY,
DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
this.aclsEnabled = conf.getBoolean(
DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY,
DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_DEFAULT);
LOG.info("ACLs enabled? " + aclsEnabled);
this.xattrsEnabled = conf.getBoolean(
DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_KEY,
DFSConfigKeys.DFS_NAMENODE_XATTRS_ENABLED_DEFAULT);
LOG.info("XAttrs enabled? " + xattrsEnabled);
this.xattrMaxSize = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY,
DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT);
Preconditions.checkArgument(xattrMaxSize >= 0,
"Cannot set a negative value for the maximum size of an xattr (%s).",
DFSConfigKeys.DFS_NAMENODE_MAX_XATTR_SIZE_KEY);
final String unlimited = xattrMaxSize == 0 ? " (unlimited)" : "";
LOG.info("Maximum size of an xattr: " + xattrMaxSize + unlimited);
this.accessTimePrecision = conf.getLong(
DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
this.storagePolicyEnabled =
conf.getBoolean(DFS_STORAGE_POLICY_ENABLED_KEY,
DFS_STORAGE_POLICY_ENABLED_DEFAULT);
this.quotaByStorageTypeEnabled =
conf.getBoolean(DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY,
DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT);
int configuredLimit = conf.getInt(
DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
this.lsLimit = configuredLimit>0 ?
configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
this.contentCountLimit = conf.getInt(
DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY,
DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT);
this.contentSleepMicroSec = conf.getLong(
DFSConfigKeys.DFS_CONTENT_SUMMARY_SLEEP_MICROSEC_KEY,
DFSConfigKeys.DFS_CONTENT_SUMMARY_SLEEP_MICROSEC_DEFAULT);
// filesystem limits
this.maxComponentLength = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY,
DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT);
this.maxDirItems = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY,
DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_DEFAULT);
this.inodeXAttrsLimit = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY,
DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_DEFAULT);
Preconditions.checkArgument(this.inodeXAttrsLimit >= 0,
"Cannot set a negative limit on the number of xattrs per inode (%s).",
DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY);
// We need a maximum maximum because by default, PB limits message sizes
// to 64MB. This means we can only store approximately 6.7 million entries
// per directory, but let's use 6.4 million for some safety.
final int MAX_DIR_ITEMS = 64 * 100 * 1000;
Preconditions.checkArgument(maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS,
"Cannot set " + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY + " to a value less than 1 or greater than " + MAX_DIR_ITEMS);
int threshold = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
NameNode.LOG.info("Caching file names occuring more than " + threshold
+ " times");
nameCache = new NameCache<ByteArray>(threshold);
namesystem = ns;
this.editLog = ns.getEditLog();
ezManager = new EncryptionZoneManager(this, conf);
this.enableLevelDb = conf.getBoolean("dfs.partialns", false);
if (enableLevelDb) {
String dbPath = conf.get("dfs.partialns.path");
int writeBufferSize = conf.getInt("dfs.partialns.writebuffer",
4096 * 1024);
long blockCacheSize = conf.getLong(
"dfs.partialns.blockcache", 0);
Options options = new Options().createIfMissing(true)
.writeBufferSize(writeBufferSize);
if (blockCacheSize != 0) {
options.blockCacheSize(blockCacheSize);
}
this.levelDb = org.apache.hadoop.hdfs.hdfsdb.DB.open(options, dbPath);
try (RWTransaction tx = newRWTransaction().begin()) {
tx.putINode(ROOT_INODE_ID, createRootForFlatNS(ns));
tx.commit();
}
this.db = null;
} else {
this.db = new DB(dirLock);
// TODO: Load fsimage
db.addRoot(createRootForFlatNS(ns));
this.levelDb = null;
}
}
FSNamesystem getFSNamesystem() {
return namesystem;
}
BlockManager getBlockManager() {
return getFSNamesystem().getBlockManager();
}
/** @return the root directory inode. */
public INodeDirectory getRoot() {
return rootDir;
}
public BlockStoragePolicySuite getBlockStoragePolicySuite() {
return getBlockManager().getStoragePolicySuite();
}
boolean isPermissionEnabled() {
return isPermissionEnabled;
}
boolean isAclsEnabled() {
return aclsEnabled;
}
boolean isXattrsEnabled() {
return xattrsEnabled;
}
int getXattrMaxSize() { return xattrMaxSize; }
boolean isStoragePolicyEnabled() {
return storagePolicyEnabled;
}
boolean isAccessTimeSupported() {
return accessTimePrecision > 0;
}
long getAccessTimePrecision() {
return accessTimePrecision;
}
boolean isQuotaByStorageTypeEnabled() {
return quotaByStorageTypeEnabled;
}
int getLsLimit() {
return lsLimit;
}
int getContentCountLimit() {
return contentCountLimit;
}
long getContentSleepMicroSec() {
return contentSleepMicroSec;
}
int getInodeXAttrsLimit() {
return inodeXAttrsLimit;
}
FSEditLog getEditLog() {
return editLog;
}
/**
* Shutdown the filestore
*/
@Override
public void close() throws IOException {}
void markNameCacheInitialized() {
writeLock();
try {
nameCache.initialized();
} finally {
writeUnlock();
}
}
boolean shouldSkipQuotaChecks() {
return skipQuotaCheck;
}
/** Enable quota verification */
void enableQuotaChecks() {
skipQuotaCheck = false;
}
/** Disable quota verification */
void disableQuotaChecks() {
skipQuotaCheck = true;
}
/**
* This is a wrapper for resolvePath(). If the path passed
* is prefixed with /.reserved/raw, then it checks to ensure that the caller
* has super user privileges.
*
* @param pc The permission checker used when resolving path.
* @param path The path to resolve.
* @param pathComponents path components corresponding to the path
* @return if the path indicates an inode, return path after replacing up to
* <inodeid> with the corresponding path of the inode, else the path
* in {@code src} as is. If the path refers to a path in the "raw"
* directory, return the non-raw pathname.
* @throws FileNotFoundException
* @throws AccessControlException
*/
String resolvePath(FSPermissionChecker pc, String path, byte[][] pathComponents)
throws FileNotFoundException, AccessControlException {
if (isReservedRawName(path) && isPermissionEnabled) {
pc.checkSuperuserPrivilege();
}
return resolvePath(path, pathComponents, this);
}
/**
* @return true if the path is a non-empty directory; otherwise, return false.
*/
boolean isNonEmptyDirectory(INodesInPath inodesInPath) {
readLock();
try {
final INode inode = inodesInPath.getLastINode();
if (inode == null || !inode.isDirectory()) {
//not found or not a directory
return false;
}
final int s = inodesInPath.getPathSnapshotId();
return !inode.asDirectory().getChildrenList(s).isEmpty();
} finally {
readUnlock();
}
}
static boolean isNonEmptyDirectory(
Transaction tx, FlatINodesInPath iip) {
FlatINode inode = iip.getLastINode();
try (DBChildrenView children = tx.childrenView(inode.id())) {
return inode.isDirectory() && !children.isEmpty();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Check whether the filepath could be created
* @throws SnapshotAccessControlException if path is in RO snapshot
*/
boolean isValidToCreate(String src, INodesInPath iip)
throws SnapshotAccessControlException {
String srcs = normalizePath(src);
return srcs.startsWith("/") && !srcs.endsWith("/") &&
iip.getLastINode() == null;
}
/**
* Check whether the path specifies a directory
*/
boolean isDir(String src) throws UnresolvedLinkException {
src = normalizePath(src);
readLock();
try {
INode node = getINode(src, false);
return node != null && node.isDirectory();
} finally {
readUnlock();
}
}
/** Updates namespace, storagespace and typespaces consumed for all
* directories until the parent directory of file represented by path.
*
* @param iip the INodesInPath instance containing all the INodes for
* updating quota usage
* @param nsDelta the delta change of namespace
* @param ssDelta the delta change of storage space consumed without replication
* @param replication the replication factor of the block consumption change
* @throws QuotaExceededException if the new count violates any quota limit
* @throws FileNotFoundException if path does not exist.
*/
void updateSpaceConsumed(INodesInPath iip, long nsDelta, long ssDelta, short replication)
throws QuotaExceededException, FileNotFoundException,
UnresolvedLinkException, SnapshotAccessControlException {
writeLock();
try {
if (iip.getLastINode() == null) {
throw new FileNotFoundException("Path not found: " + iip.getPath());
}
updateCount(iip, nsDelta, ssDelta, replication, true);
} finally {
writeUnlock();
}
}
public void updateCount(INodesInPath iip, INode.QuotaDelta quotaDelta,
boolean check) throws QuotaExceededException {
QuotaCounts counts = quotaDelta.getCountsCopy();
updateCount(iip, iip.length() - 1, counts.negation(), check);
Map<INode, QuotaCounts> deltaInOtherPaths = quotaDelta.getUpdateMap();
for (Map.Entry<INode, QuotaCounts> entry : deltaInOtherPaths.entrySet()) {
INodesInPath path = INodesInPath.fromINode(entry.getKey());
updateCount(path, path.length() - 1, entry.getValue().negation(), check);
}
for (Map.Entry<INodeDirectory, QuotaCounts> entry :
quotaDelta.getQuotaDirMap().entrySet()) {
INodeDirectory quotaDir = entry.getKey();
quotaDir.getDirectoryWithQuotaFeature().addSpaceConsumed2Cache(
entry.getValue().negation());
}
}
/**
* Update the quota usage after deletion. The quota update is only necessary
* when image/edits have been loaded and the file/dir to be deleted is not
* contained in snapshots.
*/
void updateCountForDelete(final INode inode, final INodesInPath iip) {
if (getFSNamesystem().isImageLoaded() &&
!inode.isInLatestSnapshot(iip.getLatestSnapshotId())) {
QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite());
unprotectedUpdateCount(iip, iip.length() - 1, counts.negation());
}
}
/**
* Update usage count without replication factor change
*/
void updateCount(INodesInPath iip, long nsDelta, long ssDelta, short replication,
boolean checkQuota) throws QuotaExceededException {
final INodeFile fileINode = iip.getLastINode().asFile();
EnumCounters<StorageType> typeSpaceDeltas =
getStorageTypeDeltas(fileINode.getStoragePolicyID(), ssDelta,
replication, replication);;
updateCount(iip, iip.length() - 1,
new QuotaCounts.Builder().nameSpace(nsDelta).storageSpace(
ssDelta * replication).
typeSpaces(typeSpaceDeltas).build(),
checkQuota);
}
/**
* Update usage count with replication factor change due to setReplication
*/
void updateCount(
INodesInPath iip, long nsDelta, long ssDelta, short oldRep, short newRep,
boolean checkQuota) throws QuotaExceededException {
final INodeFile fileINode = iip.getLastINode().asFile();
EnumCounters<StorageType> typeSpaceDeltas =
getStorageTypeDeltas(fileINode.getStoragePolicyID(), ssDelta, oldRep, newRep);
updateCount(iip, iip.length() - 1,
new QuotaCounts.Builder().nameSpace(nsDelta).
storageSpace(ssDelta * (newRep - oldRep)).
typeSpaces(typeSpaceDeltas).build(), checkQuota);
}
/** update count of each inode with quota
*
* @param iip inodes in a path
* @param numOfINodes the number of inodes to update starting from index 0
* @param counts the count of space/namespace/type usage to be update
* @param checkQuota if true then check if quota is exceeded
* @throws QuotaExceededException if the new count violates any quota limit
*/
void updateCount(INodesInPath iip, int numOfINodes,
QuotaCounts counts, boolean checkQuota)
throws QuotaExceededException {
assert hasWriteLock();
if (!namesystem.isImageLoaded()) {
//still initializing. do not check or update quotas.
return;
}
if (numOfINodes > iip.length()) {
numOfINodes = iip.length();
}
if (checkQuota && !skipQuotaCheck) {
verifyQuota(iip, numOfINodes, counts, null);
}
unprotectedUpdateCount(iip, numOfINodes, counts);
}
/**
* update quota of each inode and check to see if quota is exceeded.
* See {@link #updateCount(INodesInPath, int, QuotaCounts, boolean)}
*/
void updateCountNoQuotaCheck(INodesInPath inodesInPath,
int numOfINodes, QuotaCounts counts) {
assert hasWriteLock();
try {
updateCount(inodesInPath, numOfINodes, counts, false);
} catch (QuotaExceededException e) {
NameNode.LOG.error("BUG: unexpected exception ", e);
}
}
/**
* updates quota without verification
* callers responsibility is to make sure quota is not exceeded
*/
static void unprotectedUpdateCount(INodesInPath inodesInPath,
int numOfINodes, QuotaCounts counts) {
for(int i=0; i < numOfINodes; i++) {
if (inodesInPath.getINode(i).isQuotaSet()) { // a directory with quota
inodesInPath.getINode(i).asDirectory().getDirectoryWithQuotaFeature()
.addSpaceConsumed2Cache(counts);
}
}
}
public EnumCounters<StorageType> getStorageTypeDeltas(byte storagePolicyID,
long dsDelta, short oldRep, short newRep) {
EnumCounters<StorageType> typeSpaceDeltas =
new EnumCounters<StorageType>(StorageType.class);
// Storage type and its quota are only available when storage policy is set
if (storagePolicyID != HdfsConstants.BLOCK_STORAGE_POLICY_ID_UNSPECIFIED) {
BlockStoragePolicy storagePolicy = getBlockManager().getStoragePolicy(storagePolicyID);
if (oldRep != newRep) {
List<StorageType> oldChosenStorageTypes =
storagePolicy.chooseStorageTypes(oldRep);
for (StorageType t : oldChosenStorageTypes) {
if (!t.supportTypeQuota()) {
continue;
}
Preconditions.checkArgument(dsDelta > 0);
typeSpaceDeltas.add(t, -dsDelta);
}
}
List<StorageType> newChosenStorageTypes =
storagePolicy.chooseStorageTypes(newRep);
for (StorageType t : newChosenStorageTypes) {
if (!t.supportTypeQuota()) {
continue;
}
typeSpaceDeltas.add(t, dsDelta);
}
}
return typeSpaceDeltas;
}
/** Return the name of the path represented by inodes at [0, pos] */
static String getFullPathName(INode[] inodes, int pos) {
StringBuilder fullPathName = new StringBuilder();
if (inodes[0].isRoot()) {
if (pos == 0) return Path.SEPARATOR;
} else {
fullPathName.append(inodes[0].getLocalName());
}
for (int i=1; i<=pos; i++) {
fullPathName.append(Path.SEPARATOR_CHAR).append(inodes[i].getLocalName());
}
return fullPathName.toString();
}
/**
* @return the relative path of an inode from one of its ancestors,
* represented by an array of inodes.
*/
private static INode[] getRelativePathINodes(INode inode, INode ancestor) {
// calculate the depth of this inode from the ancestor
int depth = 0;
for (INode i = inode; i != null && !i.equals(ancestor); i = i.getParent()) {
depth++;
}
INode[] inodes = new INode[depth];
// fill up the inodes in the path from this inode to root
for (int i = 0; i < depth; i++) {
if (inode == null) {
NameNode.stateChangeLog.warn("Could not get full path."
+ " Corresponding file might have deleted already.");
return null;
}
inodes[depth-i-1] = inode;
inode = inode.getParent();
}
return inodes;
}
private static INode[] getFullPathINodes(INode inode) {
return getRelativePathINodes(inode, null);
}
/** Return the full path name of the specified inode */
static String getFullPathName(INode inode) {
INode[] inodes = getFullPathINodes(inode);
// inodes can be null only when its called without holding lock
return inodes == null ? "" : getFullPathName(inodes, inodes.length - 1);
}
/**
* Add the given child to the namespace.
* @param existing the INodesInPath containing all the ancestral INodes
* @param child the new INode to add
* @return a new INodesInPath instance containing the new child INode. Null
* if the adding fails.
* @throws QuotaExceededException is thrown if it violates quota limit
*/
INodesInPath addINode(INodesInPath existing, INode child)
throws QuotaExceededException, UnresolvedLinkException {
cacheName(child);
writeLock();
try {
return addLastINode(existing, child, true);
} finally {
writeUnlock();
}
}
/**
* Verify quota for adding or moving a new INode with required
* namespace and storagespace to a given position.
*
* @param iip INodes corresponding to a path
* @param pos position where a new INode will be added
* @param deltas needed namespace, storagespace and storage types
* @param commonAncestor Last node in inodes array that is a common ancestor
* for a INode that is being moved from one location to the other.
* Pass null if a node is not being moved.
* @throws QuotaExceededException if quota limit is exceeded.
*/
static void verifyQuota(INodesInPath iip, int pos, QuotaCounts deltas,
INode commonAncestor) throws QuotaExceededException {
if (deltas.getNameSpace() <= 0 && deltas.getStorageSpace() <= 0
&& deltas.getTypeSpaces().allLessOrEqual(0L)) {
// if quota is being freed or not being consumed
return;
}
// check existing components in the path
for(int i = (pos > iip.length() ? iip.length(): pos) - 1; i >= 0; i--) {
if (commonAncestor == iip.getINode(i)) {
// Stop checking for quota when common ancestor is reached
return;
}
final DirectoryWithQuotaFeature q
= iip.getINode(i).asDirectory().getDirectoryWithQuotaFeature();
if (q != null) { // a directory with quota
try {
q.verifyQuota(deltas);
} catch (QuotaExceededException e) {
List<INode> inodes = iip.getReadOnlyINodes();
final String path = getFullPathName(inodes.toArray(new INode[inodes.size()]), i);
e.setPathName(path);
throw e;
}
}
}
}
/** Verify if the inode name is legal. */
void verifyINodeName(byte[] childName) throws HadoopIllegalArgumentException {
if (Arrays.equals(DOT_SNAPSHOT_DIR_BYTES, childName)) {
String s = "\"" + HdfsConstants.DOT_SNAPSHOT_DIR + "\" is a reserved name.";
if (!namesystem.isImageLoaded()) {
s += " Please rename it before upgrade.";
}
throw new HadoopIllegalArgumentException(s);
}
}
/**
* Verify child's name for fs limit.
*
* @param childName byte[] containing new child name
* @param parentPath String containing parent path
* @throws PathComponentTooLongException child's name is too long.
*/
void verifyMaxComponentLength(byte[] childName, String parentPath)
throws PathComponentTooLongException {
if (maxComponentLength == 0) {
return;
}
final int length = childName.length;
if (length > maxComponentLength) {
final PathComponentTooLongException e = new PathComponentTooLongException(
maxComponentLength, length, parentPath,
DFSUtil.bytes2String(childName));
if (namesystem.isImageLoaded()) {
throw e;
} else {
// Do not throw if edits log is still being processed
NameNode.LOG.error("ERROR in FSDirectory.verifyINodeName", e);
}
}
}
/**
* Verify children size for fs limit.
*
* @throws MaxDirectoryItemsExceededException too many children.
*/
void verifyMaxDirItems(INodeDirectory parent, String parentPath)
throws MaxDirectoryItemsExceededException {
final int count = parent.getChildrenList(CURRENT_STATE_ID).size();
if (count >= maxDirItems) {
final MaxDirectoryItemsExceededException e
= new MaxDirectoryItemsExceededException(maxDirItems, count);
if (namesystem.isImageLoaded()) {
e.setPathName(parentPath);
throw e;
} else {
// Do not throw if edits log is still being processed
NameNode.LOG.error("FSDirectory.verifyMaxDirItems: "
+ e.getLocalizedMessage());
}
}
}
/**
* Add a child to the end of the path specified by INodesInPath.
* @return an INodesInPath instance containing the new INode
*/
@VisibleForTesting
public INodesInPath addLastINode(INodesInPath existing, INode inode,
boolean checkQuota) throws QuotaExceededException {
assert existing.getLastINode() != null &&
existing.getLastINode().isDirectory();
final int pos = existing.length();
// Disallow creation of /.reserved. This may be created when loading
// editlog/fsimage during upgrade since /.reserved was a valid name in older
// release. This may also be called when a user tries to create a file
// or directory /.reserved.
if (pos == 1 && existing.getINode(0) == rootDir && isReservedName(inode)) {
throw new HadoopIllegalArgumentException(
"File name \"" + inode.getLocalName() + "\" is reserved and cannot "
+ "be created. If this is during upgrade change the name of the "
+ "existing file or directory to another name before upgrading "
+ "to the new release.");
}
final INodeDirectory parent = existing.getINode(pos - 1).asDirectory();
// The filesystem limits are not really quotas, so this check may appear
// odd. It's because a rename operation deletes the src, tries to add
// to the dest, if that fails, re-adds the src from whence it came.
// The rename code disables the quota when it's restoring to the
// original location because a quota violation would cause the the item
// to go "poof". The fs limits must be bypassed for the same reason.
if (checkQuota) {
final String parentPath = existing.getPath();
verifyMaxComponentLength(inode.getLocalNameBytes(), parentPath);
verifyMaxDirItems(parent, parentPath);
}
// always verify inode name
verifyINodeName(inode.getLocalNameBytes());
final QuotaCounts counts = inode.computeQuotaUsage(getBlockStoragePolicySuite());
updateCount(existing, pos, counts, checkQuota);
boolean isRename = (inode.getParent() != null);
boolean added;
try {
added = parent.addChild(inode, true, existing.getLatestSnapshotId());
} catch (QuotaExceededException e) {
updateCountNoQuotaCheck(existing, pos, counts.negation());
throw e;
}
if (!added) {
updateCountNoQuotaCheck(existing, pos, counts.negation());
return null;
} else {
if (!isRename) {
AclStorage.copyINodeDefaultAcl(inode);
}
addToInodeMap(inode);
}
return INodesInPath.append(existing, inode, inode.getLocalNameBytes());
}
INodesInPath addLastINodeNoQuotaCheck(INodesInPath existing, INode i) {
try {
return addLastINode(existing, i, false);
} catch (QuotaExceededException e) {
NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e);
}
return null;
}
/**
* Remove the last inode in the path from the namespace.
* Note: the caller needs to update the ancestors' quota count.
*
* @return -1 for failing to remove;
* 0 for removing a reference whose referred inode has other
* reference nodes;
* 1 otherwise.
*/
@VisibleForTesting
public long removeLastINode(final INodesInPath iip) {
final int latestSnapshot = iip.getLatestSnapshotId();
final INode last = iip.getLastINode();
final INodeDirectory parent = iip.getINode(-2).asDirectory();
if (!parent.removeChild(last, latestSnapshot)) {
return -1;
}
return (!last.isInLatestSnapshot(latestSnapshot)
&& INodeReference.tryRemoveReference(last) > 0) ? 0 : 1;
}
static String normalizePath(String src) {
if (src.length() > 1 && src.endsWith("/")) {
src = src.substring(0, src.length() - 1);
}
return src;
}
@VisibleForTesting
public long getYieldCount() {
return yieldCount;
}
void addYieldCount(long value) {
yieldCount += value;
}
public INodeMap getINodeMap() {
return inodeMap;
}
/**
* FSEditLogLoader implementation.
* Unlike FSNamesystem.truncate, this will not schedule block recovery.
*/
void unprotectedTruncate(String src, String clientName, String clientMachine,
long newLength, long mtime, Block truncateBlock)
throws UnresolvedLinkException, QuotaExceededException,
SnapshotAccessControlException, IOException {
INodesInPath iip = getINodesInPath(src, true);
INodeFile file = iip.getLastINode().asFile();
BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
boolean onBlockBoundary =
unprotectedTruncate(iip, newLength, collectedBlocks, mtime, null);
if(! onBlockBoundary) {
BlockInfoContiguous oldBlock = file.getLastBlock();
Block tBlk =
getFSNamesystem().prepareFileForTruncate(iip,
clientName, clientMachine, file.computeFileSize() - newLength,
truncateBlock);
assert Block.matchingIdAndGenStamp(tBlk, truncateBlock) &&
tBlk.getNumBytes() == truncateBlock.getNumBytes() :
"Should be the same block.";
if(oldBlock.getBlockId() != tBlk.getBlockId() &&
!file.isBlockInLatestSnapshot(oldBlock)) {
getBlockManager().removeBlockFromMap(oldBlock);
}
}
assert onBlockBoundary == (truncateBlock == null) :
"truncateBlock is null iff on block boundary: " + truncateBlock;
getFSNamesystem().removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
}
boolean truncate(INodesInPath iip, long newLength,
BlocksMapUpdateInfo collectedBlocks,
long mtime, QuotaCounts delta)
throws IOException {
writeLock();
try {
return unprotectedTruncate(iip, newLength, collectedBlocks, mtime, delta);
} finally {
writeUnlock();
}
}
/**
* Truncate has the following properties:
* 1.) Any block deletions occur now.
* 2.) INode length is truncated now – new clients can only read up to
* the truncated length.
* 3.) INode will be set to UC and lastBlock set to UNDER_RECOVERY.
* 4.) NN will trigger DN truncation recovery and waits for DNs to report.
* 5.) File is considered UNDER_RECOVERY until truncation recovery completes.
* 6.) Soft and hard Lease expiration require truncation recovery to complete.
*
* @return true if on the block boundary or false if recovery is need
*/
boolean unprotectedTruncate(INodesInPath iip, long newLength,
BlocksMapUpdateInfo collectedBlocks,
long mtime, QuotaCounts delta) throws IOException {
assert hasWriteLock();
INodeFile file = iip.getLastINode().asFile();
int latestSnapshot = iip.getLatestSnapshotId();
file.recordModification(latestSnapshot, true);
verifyQuotaForTruncate(iip, file, newLength, delta);
long remainingLength =
file.collectBlocksBeyondMax(newLength, collectedBlocks);
file.excludeSnapshotBlocks(latestSnapshot, collectedBlocks);
file.setModificationTime(mtime);
// return whether on a block boundary
return (remainingLength - newLength) == 0;
}
private void verifyQuotaForTruncate(INodesInPath iip, INodeFile file,
long newLength, QuotaCounts delta) throws QuotaExceededException {
if (!getFSNamesystem().isImageLoaded() || shouldSkipQuotaChecks()) {
// Do not check quota if edit log is still being processed
return;
}
final BlockStoragePolicy policy = getBlockStoragePolicySuite()
.getPolicy(file.getStoragePolicyID());
file.computeQuotaDeltaForTruncate(newLength, policy, delta);
readLock();
try {
verifyQuota(iip, iip.length() - 1, delta, null);
} finally {
readUnlock();
}
}
/**
* This method is always called with writeLock of FSDirectory held.
*/
public final void addToInodeMap(INode inode) {
if (inode instanceof INodeWithAdditionalFields) {
inodeMap.put(inode);
if (!inode.isSymlink()) {
final XAttrFeature xaf = inode.getXAttrFeature();
if (xaf != null) {
final List<XAttr> xattrs = xaf.getXAttrs();
for (XAttr xattr : xattrs) {
final String xaName = XAttrHelper.getPrefixName(xattr);
if (CRYPTO_XATTR_ENCRYPTION_ZONE.equals(xaName)) {
try {
final HdfsProtos.ZoneEncryptionInfoProto ezProto =
HdfsProtos.ZoneEncryptionInfoProto.parseFrom(
xattr.getValue());
ezManager.unprotectedAddEncryptionZone(inode.getId(),
PBHelper.convert(ezProto.getSuite()),
PBHelper.convert(ezProto.getCryptoProtocolVersion()),
ezProto.getKeyName());
} catch (InvalidProtocolBufferException e) {
NameNode.LOG.warn("Error parsing protocol buffer of " +
"EZ XAttr " + xattr.getName());
}
}
}
}
}
}
}
/**
* This method is always called with writeLock of FSDirectory held.
*/
public final void removeFromInodeMap(List<? extends INode> inodes) {
if (inodes != null) {
for (INode inode : inodes) {
if (inode != null && inode instanceof INodeWithAdditionalFields) {
inodeMap.remove(inode);
ezManager.removeEncryptionZone(inode.getId());
}
}
}
}
public final void removeFromInodeMap(RWTransaction tx, List<Long> inodes) {
for (long inode : inodes) {
tx.deleteINode(inode);
ezManager.removeEncryptionZone(inode);
}
}
/**
* Get the inode from inodeMap based on its inode id.
* @param id The given id
* @return The inode associated with the given id
*/
public INode getInode(long id) {
readLock();
try {
return inodeMap.get(id);
} finally {
readUnlock();
}
}
@VisibleForTesting
int getInodeMapSize() {
return inodeMap.size();
}
long totalInodes() {
readLock();
try {
return rootDir.getDirectoryWithQuotaFeature().getSpaceConsumed()
.getNameSpace();
} finally {
readUnlock();
}
}
/**
* Reset the entire namespace tree.
*/
void reset() {
writeLock();
try {
rootDir = createRoot(getFSNamesystem());
inodeMap.clear();
addToInodeMap(rootDir);
nameCache.reset();
inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
} finally {
writeUnlock();
}
}
boolean isInAnEZ(INodesInPath iip)
throws UnresolvedLinkException, SnapshotAccessControlException {
readLock();
try {
return ezManager.isInAnEZ(iip);
} finally {
readUnlock();
}
}
String getKeyName(INodesInPath iip) {
readLock();
try {
return ezManager.getKeyName(iip);
} finally {
readUnlock();
}
}
XAttr createEncryptionZone(String src, CipherSuite suite,
CryptoProtocolVersion version, String keyName)
throws IOException {
writeLock();
try {
return ezManager.createEncryptionZone(src, suite, version, keyName);
} finally {
writeUnlock();
}
}
EncryptionZone getEZForPath(INodesInPath iip) {
readLock();
try {
return ezManager.getEZINodeForPath(iip);
} finally {
readUnlock();
}
}
BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId)
throws IOException {
readLock();
try {
return ezManager.listEncryptionZones(prevId);
} finally {
readUnlock();
}
}
/**
* Set the FileEncryptionInfo for an INode.
*/
void setFileEncryptionInfo(String src, FileEncryptionInfo info)
throws IOException {
// Make the PB for the xattr
final HdfsProtos.PerFileEncryptionInfoProto proto =
PBHelper.convertPerFileEncInfo(info);
final byte[] protoBytes = proto.toByteArray();
final XAttr fileEncryptionAttr =
XAttrHelper.buildXAttr(CRYPTO_XATTR_FILE_ENCRYPTION_INFO, protoBytes);
final List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1);
xAttrs.add(fileEncryptionAttr);
writeLock();
try {
FSDirXAttrOp.unprotectedSetXAttrs(this, src, xAttrs,
EnumSet.of(XAttrSetFlag.CREATE));
} finally {
writeUnlock();
}
}
/**
* This function combines the per-file encryption info (obtained
* from the inode's XAttrs), and the encryption info from its zone, and
* returns a consolidated FileEncryptionInfo instance. Null is returned
* for non-encrypted files.
*
* @param inode inode of the file
* @param snapshotId ID of the snapshot that
* we want to get encryption info from
* @param iip inodes in the path containing the file, passed in to
* avoid obtaining the list of inodes again; if iip is
* null then the list of inodes will be obtained again
* @return consolidated file encryption info; null for non-encrypted files
*/
FileEncryptionInfo getFileEncryptionInfo(INode inode, int snapshotId,
INodesInPath iip) throws IOException {
if (!inode.isFile()) {
return null;
}
readLock();
try {
EncryptionZone encryptionZone = getEZForPath(iip);
if (encryptionZone == null) {
// not an encrypted file
return null;
} else if(encryptionZone.getPath() == null
|| encryptionZone.getPath().isEmpty()) {
if (NameNode.LOG.isDebugEnabled()) {
NameNode.LOG.debug("Encryption zone " +
encryptionZone.getPath() + " does not have a valid path.");
}
}
final CryptoProtocolVersion version = encryptionZone.getVersion();
final CipherSuite suite = encryptionZone.getSuite();
final String keyName = encryptionZone.getKeyName();
XAttr fileXAttr = FSDirXAttrOp.unprotectedGetXAttrByName(inode,
snapshotId,
CRYPTO_XATTR_FILE_ENCRYPTION_INFO);
if (fileXAttr == null) {
NameNode.LOG.warn("Could not find encryption XAttr for file " +
iip.getPath() + " in encryption zone " + encryptionZone.getPath());
return null;
}
try {
HdfsProtos.PerFileEncryptionInfoProto fileProto =
HdfsProtos.PerFileEncryptionInfoProto.parseFrom(
fileXAttr.getValue());
return PBHelper.convert(fileProto, suite, version, keyName);
} catch (InvalidProtocolBufferException e) {
throw new IOException("Could not parse file encryption info for " +
"inode " + inode, e);
}
} finally {
readUnlock();
}
}
static INode resolveLastINode(INodesInPath iip) throws FileNotFoundException {
INode inode = iip.getLastINode();
if (inode == null) {
throw new FileNotFoundException("cannot find " + iip.getPath());
}
return inode;
}
/**
* Caches frequently used file names to reuse file name objects and
* reduce heap size.
*/
void cacheName(INode inode) {
// Name is cached only for files
if (!inode.isFile()) {
return;
}
ByteArray name = new ByteArray(inode.getLocalNameBytes());
name = nameCache.put(name);
if (name != null) {
inode.setLocalName(name.getBytes());
}
}
void shutdown() {
nameCache.reset();
inodeMap.clear();
}
/**
* Given an INode get all the path complents leading to it from the root.
* If an Inode corresponding to C is given in /A/B/C, the returned
* patch components will be {root, A, B, C}.
* Note that this method cannot handle scenarios where the inode is in a
* snapshot.
*/
public static byte[][] getPathComponents(INode inode) {
List<byte[]> components = new ArrayList<byte[]>();
components.add(0, inode.getLocalNameBytes());
while(inode.getParent() != null) {
components.add(0, inode.getParent().getLocalNameBytes());
inode = inode.getParent();
}
return components.toArray(new byte[components.size()][]);
}
/**
* @return path components for reserved path, else null.
*/
static byte[][] getPathComponentsForReservedPath(String src) {
return !isReservedName(src) ? null : INode.getPathComponents(src);
}
/** Check if a given inode name is reserved */
public static boolean isReservedName(INode inode) {
return CHECK_RESERVED_FILE_NAMES
&& Arrays.equals(inode.getLocalNameBytes(), DOT_RESERVED);
}
/** Check if a given path is reserved */
public static boolean isReservedName(String src) {
return src.startsWith(DOT_RESERVED_PATH_PREFIX + Path.SEPARATOR);
}
static boolean isReservedRawName(String src) {
return src.startsWith(DOT_RESERVED_PATH_PREFIX +
Path.SEPARATOR + RAW_STRING);
}
/**
* Resolve a /.reserved/... path to a non-reserved path.
* <p/>
* There are two special hierarchies under /.reserved/:
* <p/>
* /.reserved/.inodes/<inodeid> performs a path lookup by inodeid,
* <p/>
* /.reserved/raw/... returns the encrypted (raw) bytes of a file in an
* encryption zone. For instance, if /ezone is an encryption zone, then
* /ezone/a refers to the decrypted file and /.reserved/raw/ezone/a refers to
* the encrypted (raw) bytes of /ezone/a.
* <p/>
* Pathnames in the /.reserved/raw directory that resolve to files not in an
* encryption zone are equivalent to the corresponding non-raw path. Hence,
* if /a/b/c refers to a file that is not in an encryption zone, then
* /.reserved/raw/a/b/c is equivalent (they both refer to the same
* unencrypted file).
*
* @param src path that is being processed
* @param pathComponents path components corresponding to the path
* @param fsd FSDirectory
* @return if the path indicates an inode, return path after replacing up to
* <inodeid> with the corresponding path of the inode, else the path
* in {@code src} as is. If the path refers to a path in the "raw"
* directory, return the non-raw pathname.
* @throws FileNotFoundException if inodeid is invalid
*/
static String resolvePath(String src, byte[][] pathComponents,
FSDirectory fsd) throws FileNotFoundException {
final int nComponents = (pathComponents == null) ?
0 : pathComponents.length;
if (nComponents <= 2) {
return src;
}
if (!Arrays.equals(DOT_RESERVED, pathComponents[1])) {
/* This is not a /.reserved/ path so do nothing. */
return src;
}
if (Arrays.equals(DOT_INODES, pathComponents[2])) {
/* It's a /.reserved/.inodes path. */
if (nComponents > 3) {
return resolveDotInodesPath(src, pathComponents, fsd);
} else {
return src;
}
} else if (Arrays.equals(RAW, pathComponents[2])) {
/* It's /.reserved/raw so strip off the /.reserved/raw prefix. */
if (nComponents == 3) {
return Path.SEPARATOR;
} else {
return constructRemainingPath("", pathComponents, 3);
}
} else {
/* It's some sort of /.reserved/<unknown> path. Ignore it. */
return src;
}
}
private static String resolveDotInodesPath(String src,
byte[][] pathComponents, FSDirectory fsd)
throws FileNotFoundException {
final String inodeId = DFSUtil.bytes2String(pathComponents[3]);
final long id;
try {
id = Long.parseLong(inodeId);
} catch (NumberFormatException e) {
throw new FileNotFoundException("Invalid inode path: " + src);
}
if (id == ROOT_INODE_ID && pathComponents.length == 4) {
return Path.SEPARATOR;
}
INode inode = fsd.getInode(id);
if (inode == null) {
throw new FileNotFoundException(
"File for given inode path does not exist: " + src);
}
// Handle single ".." for NFS lookup support.
if ((pathComponents.length > 4)
&& DFSUtil.bytes2String(pathComponents[4]).equals("..")) {
INode parent = inode.getParent();
if (parent == null || parent.getId() == ROOT_INODE_ID) {
// inode is root, or its parent is root.
return Path.SEPARATOR;
} else {
return parent.getFullPathName();
}
}
String path = "";
if (id != ROOT_INODE_ID) {
path = inode.getFullPathName();
}
return constructRemainingPath(path, pathComponents, 4);
}
private static String constructRemainingPath(String pathPrefix,
byte[][] pathComponents, int startAt) {
StringBuilder path = new StringBuilder(pathPrefix);
for (int i = startAt; i < pathComponents.length; i++) {
path.append(Path.SEPARATOR).append(
DFSUtil.bytes2String(pathComponents[i]));
}
if (NameNode.LOG.isDebugEnabled()) {
NameNode.LOG.debug("Resolved path is " + path);
}
return path.toString();
}
INode getINode4DotSnapshot(String src) throws UnresolvedLinkException {
Preconditions.checkArgument(
src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR),
"%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
final String dirPath = normalizePath(src.substring(0,
src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
final INode node = this.getINode(dirPath);
if (node != null && node.isDirectory()
&& node.asDirectory().isSnapshottable()) {
return node;
}
return null;
}
INodesInPath getExistingPathINodes(byte[][] components)
throws UnresolvedLinkException {
return INodesInPath.resolve(rootDir, components, false);
}
/**
* Get {@link INode} associated with the file / directory.
*/
public INodesInPath getINodesInPath4Write(String src)
throws UnresolvedLinkException, SnapshotAccessControlException {
return getINodesInPath4Write(src, true);
}
/**
* Get {@link INode} associated with the file / directory.
* @throws SnapshotAccessControlException if path is in RO snapshot
*/
public INode getINode4Write(String src) throws UnresolvedLinkException,
SnapshotAccessControlException {
return getINodesInPath4Write(src, true).getLastINode();
}
/** @return the {@link INodesInPath} containing all inodes in the path. */
public INodesInPath getINodesInPath(String path, boolean resolveLink)
throws UnresolvedLinkException {
final byte[][] components = INode.getPathComponents(path);
return INodesInPath.resolve(rootDir, components, resolveLink);
}
/** @return the last inode in the path. */
INode getINode(String path, boolean resolveLink)
throws UnresolvedLinkException {
return getINodesInPath(path, resolveLink).getLastINode();
}
/**
* Get {@link INode} associated with the file / directory.
*/
public INode getINode(String src) throws UnresolvedLinkException {
return getINode(src, true);
}
/**
* @return the INodesInPath of the components in src
* @throws UnresolvedLinkException if symlink can't be resolved
* @throws SnapshotAccessControlException if path is in RO snapshot
*/
INodesInPath getINodesInPath4Write(String src, boolean resolveLink)
throws UnresolvedLinkException, SnapshotAccessControlException {
final byte[][] components = INode.getPathComponents(src);
INodesInPath inodesInPath = INodesInPath.resolve(rootDir, components,
resolveLink);
if (inodesInPath.isSnapshot()) {
throw new SnapshotAccessControlException(
"Modification on a read-only snapshot is disallowed");
}
return inodesInPath;
}
FSPermissionChecker getPermissionChecker()
throws AccessControlException {
try {
return getPermissionChecker(fsOwnerShortUserName, supergroup,
NameNode.getRemoteUser());
} catch (IOException e) {
throw new AccessControlException(e);
}
}
@VisibleForTesting
FSPermissionChecker getPermissionChecker(String fsOwner, String superGroup,
UserGroupInformation ugi) throws AccessControlException {
return new FSPermissionChecker(
fsOwner, superGroup, ugi,
attributeProvider == null ?
DefaultINodeAttributesProvider.DEFAULT_PROVIDER
: attributeProvider);
}
void checkOwner(FSPermissionChecker pc, INodesInPath iip)
throws AccessControlException {
checkPermission(pc, iip, true, null, null, null, null);
}
void checkOwner(FSPermissionChecker pc, FlatINodesInPath iip)
throws AccessControlException {
// TODO
}
void checkPathAccess(FSPermissionChecker pc, INodesInPath iip,
FsAction access) throws AccessControlException {
checkPermission(pc, iip, false, null, null, access, null);
}
void checkParentAccess(FSPermissionChecker pc, INodesInPath iip,
FsAction access) throws AccessControlException {
checkPermission(pc, iip, false, null, access, null, null);
}
void checkAncestorAccess(FSPermissionChecker pc, INodesInPath iip,
FsAction access) throws AccessControlException {
checkPermission(pc, iip, false, access, null, null, null);
}
void checkTraverse(FSPermissionChecker pc, INodesInPath iip)
throws AccessControlException {
checkPermission(pc, iip, false, null, null, null, null);
}
void checkPathAccess(FSPermissionChecker pc, FlatINodesInPath iip,
FsAction access) throws AccessControlException {
// TODO
}
void checkTraverse(FSPermissionChecker pc, Resolver.Result iip)
throws AccessControlException {
// TODO
}
void checkAncestorAccess(FSPermissionChecker pc, Resolver.Result iip,
FsAction access) throws AccessControlException {
// TODO
}
/**
* Check whether current user have permissions to access the path. For more
* details of the parameters, see
* {@link FSPermissionChecker#checkPermission}.
*/
void checkPermission(FSPermissionChecker pc, INodesInPath iip,
boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess,
FsAction access, FsAction subAccess)
throws AccessControlException {
checkPermission(pc, iip, doCheckOwner, ancestorAccess,
parentAccess, access, subAccess, false);
}
/**
* Check whether current user have permissions to access the path. For more
* details of the parameters, see
* {@link FSPermissionChecker#checkPermission}.
*/
void checkPermission(FSPermissionChecker pc, INodesInPath iip,
boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess,
FsAction access, FsAction subAccess, boolean ignoreEmptyDir)
throws AccessControlException {
if (!pc.isSuperUser()) {
readLock();
try {
pc.checkPermission(iip, doCheckOwner, ancestorAccess,
parentAccess, access, subAccess, ignoreEmptyDir);
} finally {
readUnlock();
}
}
}
void checkUnreadableBySuperuser(
FSPermissionChecker pc, FlatINodesInPath iip)
throws IOException {
// TODO
// if (pc.isSuperUser()) {
// for (XAttr xattr : FSDirXAttrOp.getXAttrs(this, inode, snapshotId)) {
// if (XAttrHelper.getPrefixName(xattr).
// equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) {
// throw new AccessControlException(
// "Access is denied for " + pc.getUser() + " since the superuser "
// + "is not allowed to perform this operation.");
// }
// }
// }
}
void checkPermission(FSPermissionChecker pc, FlatINodesInPath iip,
boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess,
FsAction access, FsAction subAccess, boolean ignoreEmptyDir)
throws AccessControlException {
// TODO
}
HdfsFileStatus getAuditFileInfo(INodesInPath iip)
throws IOException {
return (namesystem.isAuditEnabled() && namesystem.isExternalInvocation())
? FSDirStatAndListingOp.getFileInfo(this, iip.getPath(), iip, false,
false) : null;
}
public HdfsFileStatus getAuditFileInfo(FlatINodesInPath iip) {
// TODO
return null;
}
/**
* Verify that parent directory of src exists.
*/
void verifyParentDir(INodesInPath iip, String src)
throws FileNotFoundException, ParentNotDirectoryException {
Path parent = new Path(src).getParent();
if (parent != null) {
final INode parentNode = iip.getINode(-2);
if (parentNode == null) {
throw new FileNotFoundException("Parent directory doesn't exist: "
+ parent);
} else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
throw new ParentNotDirectoryException("Parent path is not a directory: "
+ parent);
}
}
}
/** Allocate a new inode ID. */
long allocateNewInodeId() {
return inodeId.nextValue();
}
/** @return the last inode ID. */
public long getLastInodeId() {
return inodeId.getCurrentValue();
}
/**
* Set the last allocated inode id when fsimage or editlog is loaded.
*/
void resetLastInodeId(long newValue) throws IOException {
try {
inodeId.skipTo(newValue);
} catch(IllegalStateException ise) {
throw new IOException(ise);
}
}
/** Should only be used for tests to reset to any value */
void resetLastInodeIdWithoutChecking(long newValue) {
inodeId.setCurrentValue(newValue);
}
INodeAttributes getAttributes(String fullPath, byte[] path,
INode node, int snapshot) {
INodeAttributes nodeAttrs = node;
if (attributeProvider != null) {
nodeAttrs = node.getSnapshotINode(snapshot);
fullPath = fullPath + (fullPath.endsWith(Path.SEPARATOR) ? ""
: Path.SEPARATOR)
+ DFSUtil.bytes2String(path);
nodeAttrs = attributeProvider.getAttributes(fullPath, nodeAttrs);
} else {
nodeAttrs = node.getSnapshotINode(snapshot);
}
return nodeAttrs;
}
}