blob: 908ab0c44c01e592e16aad5518dac4be7de1e972 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import java.io.IOException;
/**
* This test serves a prototype to demo the idea proposed so far. It creates two
* files using the same data, one is in replica mode, the other is in stripped
* layout. For simple, it assumes 6 data blocks in both files and the block size
* are the same.
*/
public class TestFileChecksum {
private static final Logger LOG = LoggerFactory
.getLogger(TestFileChecksum.class);
private int dataBlocks = StripedFileTestUtil.NUM_DATA_BLOCKS;
private int parityBlocks = StripedFileTestUtil.NUM_PARITY_BLOCKS;
private MiniDFSCluster cluster;
private DistributedFileSystem fs;
private Configuration conf;
private DFSClient client;
private int cellSize = StripedFileTestUtil.BLOCK_STRIPED_CELL_SIZE;
private int stripesPerBlock = 6;
private int blockSize = cellSize * stripesPerBlock;
private int numBlockGroups = 10;
private int stripSize = cellSize * dataBlocks;
private int blockGroupSize = stripesPerBlock * stripSize;
private int fileSize = numBlockGroups * blockGroupSize;
private int bytesPerCRC;
private String ecDir = "/striped";
private String stripedFile1 = ecDir + "/stripedFileChecksum1";
private String stripedFile2 = ecDir + "/stripedFileChecksum2";
private String replicatedFile = "/replicatedFileChecksum";
@Before
public void setup() throws IOException {
int numDNs = dataBlocks + parityBlocks + 2;
conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY,
false);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build();
Path ecPath = new Path(ecDir);
cluster.getFileSystem().mkdir(ecPath, FsPermission.getDirDefault());
cluster.getFileSystem().getClient().setErasureCodingPolicy(ecDir, null);
fs = cluster.getFileSystem();
client = fs.getClient();
bytesPerCRC = conf.getInt(
HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY,
HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT);
}
@After
public void tearDown() {
if (cluster != null) {
cluster.shutdown();
cluster = null;
}
}
@Test(timeout = 90000)
public void testStripedFileChecksum1() throws Exception {
int length = 0;
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, length + 10);
}
@Test(timeout = 90000)
public void testStripedFileChecksum2() throws Exception {
int length = stripSize - 1;
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, length - 10);
}
@Test(timeout = 90000)
public void testStripedFileChecksum3() throws Exception {
int length = stripSize;
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, length - 10);
}
@Test(timeout = 90000)
public void testStripedFileChecksum4() throws Exception {
int length = stripSize + cellSize * 2;
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, length - 10);
}
@Test(timeout = 90000)
public void testStripedFileChecksum5() throws Exception {
int length = blockGroupSize;
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, length - 10);
}
@Test(timeout = 90000)
public void testStripedFileChecksum6() throws Exception {
int length = blockGroupSize + blockSize;
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, length - 10);
}
@Test(timeout = 90000)
public void testStripedFileChecksum7() throws Exception {
int length = -1; // whole file
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
testStripedFileChecksum(length, fileSize);
}
private void testStripedFileChecksum(int range1, int range2)
throws Exception {
FileChecksum stripedFileChecksum1 = getFileChecksum(stripedFile1,
range1, false);
FileChecksum stripedFileChecksum2 = getFileChecksum(stripedFile2,
range1, false);
FileChecksum stripedFileChecksum3 = getFileChecksum(stripedFile2,
range2, false);
LOG.info("stripedFileChecksum1:" + stripedFileChecksum1);
LOG.info("stripedFileChecksum2:" + stripedFileChecksum2);
LOG.info("stripedFileChecksum3:" + stripedFileChecksum3);
Assert.assertTrue(stripedFileChecksum1.equals(stripedFileChecksum2));
if (range1 >=0 && range1 != range2) {
Assert.assertFalse(stripedFileChecksum1.equals(stripedFileChecksum3));
}
}
@Test(timeout = 90000)
public void testStripedAndReplicatedFileChecksum() throws Exception {
prepareTestFiles(fileSize, new String[] {stripedFile1, replicatedFile});
FileChecksum stripedFileChecksum1 = getFileChecksum(stripedFile1,
10, false);
FileChecksum replicatedFileChecksum = getFileChecksum(replicatedFile,
10, false);
Assert.assertFalse(stripedFileChecksum1.equals(replicatedFileChecksum));
}
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocks1() throws Exception {
prepareTestFiles(fileSize, new String[] {stripedFile1});
FileChecksum stripedFileChecksum1 = getFileChecksum(stripedFile1, fileSize,
false);
FileChecksum stripedFileChecksumRecon = getFileChecksum(stripedFile1,
fileSize, true);
LOG.info("stripedFileChecksum1:" + stripedFileChecksum1);
LOG.info("stripedFileChecksumRecon:" + stripedFileChecksumRecon);
Assert.assertTrue("Checksum mismatches!",
stripedFileChecksum1.equals(stripedFileChecksumRecon));
}
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocks2() throws Exception {
prepareTestFiles(fileSize, new String[] {stripedFile1, stripedFile2});
FileChecksum stripedFileChecksum1 = getFileChecksum(stripedFile1, -1,
false);
FileChecksum stripedFileChecksum2 = getFileChecksum(stripedFile2, -1,
false);
FileChecksum stripedFileChecksum2Recon = getFileChecksum(stripedFile2, -1,
true);
LOG.info("stripedFileChecksum1:" + stripedFileChecksum1);
LOG.info("stripedFileChecksum2:" + stripedFileChecksum1);
LOG.info("stripedFileChecksum2Recon:" + stripedFileChecksum2Recon);
Assert.assertTrue("Checksum mismatches!",
stripedFileChecksum1.equals(stripedFileChecksum2));
Assert.assertTrue("Checksum mismatches!",
stripedFileChecksum1.equals(stripedFileChecksum2Recon));
Assert.assertTrue("Checksum mismatches!",
stripedFileChecksum2.equals(stripedFileChecksum2Recon));
}
private void testStripedFileChecksumWithMissedDataBlocksRangeQuery(
String stripedFile, int requestedLen) throws Exception {
LOG.info("Checksum file:{}, requested length:{}", stripedFile,
requestedLen);
prepareTestFiles(fileSize, new String[] {stripedFile});
FileChecksum stripedFileChecksum1 = getFileChecksum(stripedFile,
requestedLen, false);
FileChecksum stripedFileChecksumRecon = getFileChecksum(stripedFile,
requestedLen, true);
LOG.info("stripedFileChecksum1:" + stripedFileChecksum1);
LOG.info("stripedFileChecksumRecon:" + stripedFileChecksumRecon);
Assert.assertTrue("Checksum mismatches!",
stripedFileChecksum1.equals(stripedFileChecksumRecon));
}
/**
* Test to verify that the checksum can be computed for a small file less than
* bytesPerCRC size.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery1()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1, 1);
}
/**
* Test to verify that the checksum can be computed for a small file less than
* bytesPerCRC size.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery2()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1, 10);
}
/**
* Test to verify that the checksum can be computed by giving bytesPerCRC
* length of file range for checksum calculation. 512 is the value of
* bytesPerCRC.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery3()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
bytesPerCRC);
}
/**
* Test to verify that the checksum can be computed by giving 'cellsize'
* length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery4()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
cellSize);
}
/**
* Test to verify that the checksum can be computed by giving less than
* cellsize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery5()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
cellSize - 1);
}
/**
* Test to verify that the checksum can be computed by giving greater than
* cellsize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery6()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
cellSize + 1);
}
/**
* Test to verify that the checksum can be computed by giving two times
* cellsize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery7()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
cellSize * 2);
}
/**
* Test to verify that the checksum can be computed by giving stripSize
* length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery8()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
stripSize);
}
/**
* Test to verify that the checksum can be computed by giving less than
* stripSize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery9()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
stripSize - 1);
}
/**
* Test to verify that the checksum can be computed by giving greater than
* stripSize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery10()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
stripSize + 1);
}
/**
* Test to verify that the checksum can be computed by giving less than
* blockGroupSize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery11()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
blockGroupSize - 1);
}
/**
* Test to verify that the checksum can be computed by giving greaterthan
* blockGroupSize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery12()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
blockGroupSize + 1);
}
/**
* Test to verify that the checksum can be computed by giving greater than
* blockGroupSize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery13()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
blockGroupSize * numBlockGroups / 2);
}
/**
* Test to verify that the checksum can be computed by giving lessthan
* fileSize length of file range for checksum calculation.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery14()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
fileSize - 1);
}
/**
* Test to verify that the checksum can be computed for a length greater than
* file size.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery15()
throws Exception {
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile1,
fileSize * 2);
}
/**
* Test to verify that the checksum can be computed for a small file less than
* bytesPerCRC size.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery16()
throws Exception {
int fileLength = 100;
String stripedFile3 = ecDir + "/stripedFileChecksum3";
prepareTestFiles(fileLength, new String[] {stripedFile3});
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3,
fileLength - 1);
}
/**
* Test to verify that the checksum can be computed for a small file less than
* bytesPerCRC size.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery17()
throws Exception {
int fileLength = 100;
String stripedFile3 = ecDir + "/stripedFileChecksum3";
prepareTestFiles(fileLength, new String[] {stripedFile3});
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, 1);
}
/**
* Test to verify that the checksum can be computed for a small file less than
* bytesPerCRC size.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery18()
throws Exception {
int fileLength = 100;
String stripedFile3 = ecDir + "/stripedFileChecksum3";
prepareTestFiles(fileLength, new String[] {stripedFile3});
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, 10);
}
/**
* Test to verify that the checksum can be computed with greater than file
* length.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery19()
throws Exception {
int fileLength = 100;
String stripedFile3 = ecDir + "/stripedFileChecksum3";
prepareTestFiles(fileLength, new String[] {stripedFile3});
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3,
fileLength * 2);
}
/**
* Test to verify that the checksum can be computed for small file with less
* than file length.
*/
@Test(timeout = 90000)
public void testStripedFileChecksumWithMissedDataBlocksRangeQuery20()
throws Exception {
int fileLength = bytesPerCRC;
String stripedFile3 = ecDir + "/stripedFileChecksum3";
prepareTestFiles(fileLength, new String[] {stripedFile3});
testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3,
bytesPerCRC - 1);
}
private FileChecksum getFileChecksum(String filePath, int range,
boolean killDn) throws Exception {
int dnIdxToDie = -1;
if (killDn) {
dnIdxToDie = getDataNodeToKill(filePath);
DataNode dnToDie = cluster.getDataNodes().get(dnIdxToDie);
shutdownDataNode(dnToDie);
}
Path testPath = new Path(filePath);
FileChecksum fc;
if (range >= 0) {
fc = fs.getFileChecksum(testPath, range);
} else {
fc = fs.getFileChecksum(testPath);
}
if (dnIdxToDie != -1) {
cluster.restartDataNode(dnIdxToDie, true);
}
return fc;
}
private void prepareTestFiles(int fileLength, String[] filePaths)
throws IOException {
byte[] fileData = StripedFileTestUtil.generateBytes(fileLength);
for (String filePath : filePaths) {
Path testPath = new Path(filePath);
DFSTestUtil.writeFile(fs, testPath, fileData);
}
}
void shutdownDataNode(DataNode dataNode) throws IOException {
/*
* Kill the datanode which contains one replica
* We need to make sure it dead in namenode: clear its update time and
* trigger NN to check heartbeat.
*/
dataNode.shutdown();
cluster.setDataNodeDead(dataNode.getDatanodeId());
}
/**
* Determine the datanode that hosts the first block of the file. For simple
* this just returns the first datanode as it's firstly tried.
*/
int getDataNodeToKill(String filePath) throws IOException {
LocatedBlocks locatedBlocks = client.getLocatedBlocks(filePath, 0);
LocatedBlock locatedBlock = locatedBlocks.get(0);
DatanodeInfo[] datanodes = locatedBlock.getLocations();
DatanodeInfo chosenDn = datanodes[0];
int idx = 0;
for (DataNode dn : cluster.getDataNodes()) {
if (dn.getInfoPort() == chosenDn.getInfoPort()) {
return idx;
}
idx++;
}
return -1;
}
}