blob: 9a422427dbf2c5763bb4fdfa81abeed9b424547f [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.LightWeightResizableGSet;
import com.google.common.annotations.VisibleForTesting;
/**
* This class is used by datanodes to maintain meta data of its replicas.
* It provides a general interface for meta information of a replica.
*/
@InterfaceAudience.Private
abstract public class ReplicaInfo extends Block
implements Replica, LightWeightResizableGSet.LinkedElement {
/** For implementing {@link LightWeightResizableGSet.LinkedElement} interface */
private LightWeightResizableGSet.LinkedElement next;
/** volume where the replica belongs */
private FsVolumeSpi volume;
/** directory where block & meta files belong */
/**
* Base directory containing numerically-identified sub directories and
* possibly blocks.
*/
private File baseDir;
/**
* Whether or not this replica's parent directory includes subdirs, in which
* case we can generate them based on the replica's block ID
*/
private boolean hasSubdirs;
private static final Map<String, File> internedBaseDirs = new HashMap<String, File>();
/** This is used by some tests and FsDatasetUtil#computeChecksum. */
private static final FileIoProvider DEFAULT_FILE_IO_PROVIDER =
new FileIoProvider(null, null);
/**
* Constructor
* @param block a block
* @param vol volume where replica is located
* @param dir directory path where block and meta files are located
*/
ReplicaInfo(Block block, FsVolumeSpi vol, File dir) {
this(block.getBlockId(), block.getNumBytes(),
block.getGenerationStamp(), vol, dir);
}
/**
* Constructor
* @param blockId block id
* @param len replica length
* @param genStamp replica generation stamp
* @param vol volume where replica is located
* @param dir directory path where block and meta files are located
*/
ReplicaInfo(long blockId, long len, long genStamp,
FsVolumeSpi vol, File dir) {
super(blockId, len, genStamp);
this.volume = vol;
setDirInternal(dir);
}
/**
* Copy constructor.
* @param from where to copy from
*/
ReplicaInfo(ReplicaInfo from) {
this(from, from.getVolume(), from.getDir());
}
/**
* Get the full path of this replica's data file
* @return the full path of this replica's data file
*/
public File getBlockFile() {
return new File(getDir(), getBlockName());
}
/**
* Get the full path of this replica's meta file
* @return the full path of this replica's meta file
*/
public File getMetaFile() {
return new File(getDir(),
DatanodeUtil.getMetaName(getBlockName(), getGenerationStamp()));
}
/**
* Get the volume where this replica is located on disk
* @return the volume where this replica is located on disk
*/
public FsVolumeSpi getVolume() {
return volume;
}
/**
* Get the {@link FileIoProvider} for disk IO operations.
*/
public FileIoProvider getFileIoProvider() {
// In tests and when invoked via FsDatasetUtil#computeChecksum, the
// target volume for this replica may be unknown and hence null.
// Use the DEFAULT_FILE_IO_PROVIDER with no-op hooks.
return (volume != null) ? volume.getFileIoProvider()
: DEFAULT_FILE_IO_PROVIDER;
}
/**
* Set the volume where this replica is located on disk
*/
void setVolume(FsVolumeSpi vol) {
this.volume = vol;
}
/**
* Get the storageUuid of the volume that stores this replica.
*/
@Override
public String getStorageUuid() {
return volume.getStorageID();
}
/**
* Return the parent directory path where this replica is located
* @return the parent directory path where this replica is located
*/
File getDir() {
return hasSubdirs ? DatanodeUtil.idToBlockDir(baseDir,
getBlockId()) : baseDir;
}
/**
* Set the parent directory where this replica is located
* @param dir the parent directory where the replica is located
*/
public void setDir(File dir) {
setDirInternal(dir);
}
private void setDirInternal(File dir) {
if (dir == null) {
baseDir = null;
return;
}
ReplicaDirInfo dirInfo = parseBaseDir(dir, getBlockId());
this.hasSubdirs = dirInfo.hasSubidrs;
synchronized (internedBaseDirs) {
if (!internedBaseDirs.containsKey(dirInfo.baseDirPath)) {
// Create a new String path of this file and make a brand new File object
// to guarantee we drop the reference to the underlying char[] storage.
File baseDir = new File(dirInfo.baseDirPath);
internedBaseDirs.put(dirInfo.baseDirPath, baseDir);
}
this.baseDir = internedBaseDirs.get(dirInfo.baseDirPath);
}
}
@VisibleForTesting
public static class ReplicaDirInfo {
public String baseDirPath;
public boolean hasSubidrs;
public ReplicaDirInfo (String baseDirPath, boolean hasSubidrs) {
this.baseDirPath = baseDirPath;
this.hasSubidrs = hasSubidrs;
}
}
@VisibleForTesting
public static ReplicaDirInfo parseBaseDir(File dir, long blockId) {
File currentDir = dir;
boolean hasSubdirs = false;
while (currentDir.getName().startsWith(DataStorage.BLOCK_SUBDIR_PREFIX)) {
hasSubdirs = true;
currentDir = currentDir.getParentFile();
}
if (hasSubdirs) {
// set baseDir to currentDir if it matches id(idToBlockDir).
File idToBlockDir = DatanodeUtil.idToBlockDir(currentDir, blockId);
if (idToBlockDir.equals(dir)) {
return new ReplicaDirInfo(currentDir.getAbsolutePath(), true);
}
}
return new ReplicaDirInfo(dir.getAbsolutePath(), false);
}
/**
* Number of bytes reserved for this replica on disk.
*/
public long getBytesReserved() {
return 0;
}
/**
* Number of bytes originally reserved for this replica. The actual
* reservation is adjusted as data is written to disk.
*
* @return the number of bytes originally reserved for this replica.
*/
public long getOriginalBytesReserved() {
return 0;
}
/**
* Copy specified file into a temporary file. Then rename the
* temporary file to the original name. This will cause any
* hardlinks to the original file to be removed. The temporary
* files are created in the same directory. The temporary files will
* be recovered (especially on Windows) on datanode restart.
*/
private void breakHardlinks(File file, Block b) throws IOException {
final FileIoProvider fileIoProvider = getFileIoProvider();
final File tmpFile = DatanodeUtil.createFileWithExistsCheck(
getVolume(), b, DatanodeUtil.getUnlinkTmpFile(file), fileIoProvider);
try {
try (FileInputStream in = fileIoProvider.getFileInputStream(
getVolume(), file)) {
try (FileOutputStream out = fileIoProvider.getFileOutputStream(
getVolume(), tmpFile)) {
IOUtils.copyBytes(in, out, 16 * 1024);
}
if (file.length() != tmpFile.length()) {
throw new IOException("Copy of file " + file + " size "
+ file.length() + " into file " + tmpFile
+ " resulted in a size of " + tmpFile.length());
}
}
fileIoProvider.replaceFile(getVolume(), tmpFile, file);
} catch (IOException e) {
DataNode.LOG.error("Cannot breakHardlinks for file " + file, e);
if (!fileIoProvider.delete(getVolume(), tmpFile)) {
DataNode.LOG.info("detachFile failed to delete temporary file " +
tmpFile);
}
throw e;
}
}
/**
* This function "breaks hardlinks" to the current replica file.
*
* When doing a DataNode upgrade, we create a bunch of hardlinks to each block
* file. This cleverly ensures that both the old and the new storage
* directories can contain the same block file, without using additional space
* for the data.
*
* However, when we want to append to the replica file, we need to "break" the
* hardlink to ensure that the old snapshot continues to contain the old data
* length. If we failed to do that, we could roll back to the previous/
* directory during a downgrade, and find that the block contents were longer
* than they were at the time of upgrade.
*
* @return true only if data was copied.
* @throws IOException
*/
public boolean breakHardLinksIfNeeded() throws IOException {
File file = getBlockFile();
if (file == null || getVolume() == null) {
throw new IOException("detachBlock:Block not found. " + this);
}
File meta = getMetaFile();
int linkCount = getHardLinkCount(file);
if (linkCount > 1) {
DataNode.LOG.info("Breaking hardlink for " + linkCount + "x-linked " +
"block " + this);
breakHardlinks(file, this);
}
if (getHardLinkCount(meta) > 1) {
breakHardlinks(meta, this);
}
return true;
}
@Override //Object
public String toString() {
return getClass().getSimpleName()
+ ", " + super.toString()
+ ", " + getState()
+ "\n getNumBytes() = " + getNumBytes()
+ "\n getBytesOnDisk() = " + getBytesOnDisk()
+ "\n getVisibleLength()= " + getVisibleLength()
+ "\n getVolume() = " + getVolume()
+ "\n getBlockFile() = " + getBlockFile();
}
@Override
public boolean isOnTransientStorage() {
return volume.isTransientStorage();
}
@Override
public LightWeightResizableGSet.LinkedElement getNext() {
return next;
}
@Override
public void setNext(LightWeightResizableGSet.LinkedElement next) {
this.next = next;
}
int getHardLinkCount(File fileName) throws IOException {
return getFileIoProvider().getHardLinkCount(getVolume(), fileName);
}
}