blob: 3476e635c5c097125f10d24d8c4a43ad1bc9f0d0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataOutputStream;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.Text;
public class TestFileInputFormat extends TestCase {
Configuration conf = new Configuration();
MiniDFSCluster dfs = null;
private MiniDFSCluster newDFSCluster(JobConf conf) throws Exception {
return new MiniDFSCluster(conf, 4, true,
new String[]{"/rack0", "/rack0",
"/rack1", "/rack1"},
new String[]{"host0", "host1",
"host2", "host3"});
}
public void testLocality() throws Exception {
JobConf job = new JobConf(conf);
dfs = newDFSCluster(job);
FileSystem fs = dfs.getFileSystem();
System.out.println("FileSystem " + fs.getUri());
Path inputDir = new Path("/foo/");
String fileName = "part-0000";
createInputs(fs, inputDir, fileName);
// split it using a file input format
TextInputFormat.addInputPath(job, inputDir);
TextInputFormat inFormat = new TextInputFormat();
inFormat.configure(job);
InputSplit[] splits = inFormat.getSplits(job, 1);
FileStatus fileStatus = fs.getFileStatus(new Path(inputDir, fileName));
BlockLocation[] locations =
fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
System.out.println("Made splits");
// make sure that each split is a block and the locations match
for(int i=0; i < splits.length; ++i) {
FileSplit fileSplit = (FileSplit) splits[i];
System.out.println("File split: " + fileSplit);
for (String h: fileSplit.getLocations()) {
System.out.println("Location: " + h);
}
System.out.println("Block: " + locations[i]);
assertEquals(locations[i].getOffset(), fileSplit.getStart());
assertEquals(locations[i].getLength(), fileSplit.getLength());
String[] blockLocs = locations[i].getHosts();
String[] splitLocs = fileSplit.getLocations();
assertEquals(2, blockLocs.length);
assertEquals(2, splitLocs.length);
assertTrue((blockLocs[0].equals(splitLocs[0]) &&
blockLocs[1].equals(splitLocs[1])) ||
(blockLocs[1].equals(splitLocs[0]) &&
blockLocs[0].equals(splitLocs[1])));
}
assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES,
1, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0));
}
private void createInputs(FileSystem fs, Path inDir, String fileName)
throws IOException {
// create a multi-block file on hdfs
DataOutputStream out = fs.create(new Path(inDir, fileName), true, 4096,
(short) 2, 512, null);
for(int i=0; i < 1000; ++i) {
out.writeChars("Hello\n");
}
out.close();
System.out.println("Wrote file");
}
public void testNumInputs() throws Exception {
JobConf job = new JobConf(conf);
dfs = newDFSCluster(job);
FileSystem fs = dfs.getFileSystem();
System.out.println("FileSystem " + fs.getUri());
Path inputDir = new Path("/foo/");
final int numFiles = 10;
String fileNameBase = "part-0000";
for (int i=0; i < numFiles; ++i) {
createInputs(fs, inputDir, fileNameBase + String.valueOf(i));
}
createInputs(fs, inputDir, "_meta");
createInputs(fs, inputDir, "_temp");
// split it using a file input format
TextInputFormat.addInputPath(job, inputDir);
TextInputFormat inFormat = new TextInputFormat();
inFormat.configure(job);
InputSplit[] splits = inFormat.getSplits(job, 1);
assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES,
numFiles, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0));
}
final Path root = new Path("/TestFileInputFormat");
final Path file1 = new Path(root, "file1");
final Path dir1 = new Path(root, "dir1");
final Path file2 = new Path(dir1, "file2");
static final int BLOCKSIZE = 1024;
static final byte[] databuf = new byte[BLOCKSIZE];
private static final String rack1[] = new String[] {
"/r1"
};
private static final String hosts1[] = new String[] {
"host1.rack1.com"
};
private class DummyFileInputFormat extends FileInputFormat<Text, Text> {
@Override
public RecordReader<Text, Text> getRecordReader(InputSplit split,
JobConf job, Reporter reporter) throws IOException {
return null;
}
}
public void testMultiLevelInput() throws IOException {
JobConf job = new JobConf(conf);
job.setBoolean("dfs.replication.considerLoad", false);
dfs = new MiniDFSCluster(job, 1, true, rack1, hosts1);
dfs.waitActive();
String namenode = (dfs.getFileSystem()).getUri().getHost() + ":" +
(dfs.getFileSystem()).getUri().getPort();
FileSystem fileSys = dfs.getFileSystem();
if (!fileSys.mkdirs(dir1)) {
throw new IOException("Mkdirs failed to create " + root.toString());
}
writeFile(job, file1, (short)1, 1);
writeFile(job, file2, (short)1, 1);
// split it using a CombinedFile input format
DummyFileInputFormat inFormat = new DummyFileInputFormat();
inFormat.setInputPaths(job, root);
// By default, we don't allow multi-level/recursive inputs
boolean exceptionThrown = false;
try {
InputSplit[] splits = inFormat.getSplits(job, 1);
} catch (Exception e) {
exceptionThrown = true;
}
assertTrue("Exception should be thrown by default for scanning a "
+ "directory with directories inside.", exceptionThrown);
// Enable multi-level/recursive inputs
job.setBoolean("mapred.input.dir.recursive", true);
InputSplit[] splits = inFormat.getSplits(job, 1);
assertEquals(splits.length, 2);
}
static void writeFile(Configuration conf, Path name,
short replication, int numBlocks) throws IOException {
FileSystem fileSys = FileSystem.get(conf);
FSDataOutputStream stm = fileSys.create(name, true,
conf.getInt("io.file.buffer.size", 4096),
replication, (long)BLOCKSIZE);
for (int i = 0; i < numBlocks; i++) {
stm.write(databuf);
}
stm.close();
DFSTestUtil.waitReplication(fileSys, name, replication);
}
@Override
public void tearDown() throws Exception {
if (dfs != null) {
dfs.shutdown();
dfs = null;
}
}
}