blob: bc1a279abef055c466795564f7a305a1b70b5bd5 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataOutputStream;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
public class TestFileInputFormat extends TestCase {
Configuration conf = new Configuration();
MiniDFSCluster dfs = null;
public void setUp() throws Exception {
dfs = new MiniDFSCluster(conf, 4, true,
new String[]{"/rack0", "/rack0",
"/rack1", "/rack1"},
new String[]{"host0", "host1",
"host2", "host3"});
}
public void testLocality() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = dfs.getFileSystem();
System.out.println("FileSystem " + fs.getUri());
Path inputDir = new Path("/foo/");
String fileName = "part-0000";
createInputs(fs, inputDir, fileName);
// split it using a file input format
TextInputFormat.addInputPath(job, inputDir);
TextInputFormat inFormat = new TextInputFormat();
inFormat.configure(job);
InputSplit[] splits = inFormat.getSplits(job, 1);
FileStatus fileStatus = fs.getFileStatus(new Path(inputDir, fileName));
BlockLocation[] locations =
fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
System.out.println("Made splits");
// make sure that each split is a block and the locations match
for(int i=0; i < splits.length; ++i) {
FileSplit fileSplit = (FileSplit) splits[i];
System.out.println("File split: " + fileSplit);
for (String h: fileSplit.getLocations()) {
System.out.println("Location: " + h);
}
System.out.println("Block: " + locations[i]);
assertEquals(locations[i].getOffset(), fileSplit.getStart());
assertEquals(locations[i].getLength(), fileSplit.getLength());
String[] blockLocs = locations[i].getHosts();
String[] splitLocs = fileSplit.getLocations();
assertEquals(2, blockLocs.length);
assertEquals(2, splitLocs.length);
assertTrue((blockLocs[0].equals(splitLocs[0]) &&
blockLocs[1].equals(splitLocs[1])) ||
(blockLocs[1].equals(splitLocs[0]) &&
blockLocs[0].equals(splitLocs[1])));
}
assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES,
1, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0));
}
private void createInputs(FileSystem fs, Path inDir, String fileName)
throws IOException {
// create a multi-block file on hdfs
DataOutputStream out = fs.create(new Path(inDir, fileName), true, 4096,
(short) 2, 512, null);
for(int i=0; i < 1000; ++i) {
out.writeChars("Hello\n");
}
out.close();
System.out.println("Wrote file");
}
public void testNumInputs() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = dfs.getFileSystem();
System.out.println("FileSystem " + fs.getUri());
Path inputDir = new Path("/foo/");
final int numFiles = 10;
String fileNameBase = "part-0000";
for (int i=0; i < numFiles; ++i) {
createInputs(fs, inputDir, fileNameBase + String.valueOf(i));
}
createInputs(fs, inputDir, "_meta");
createInputs(fs, inputDir, "_temp");
// split it using a file input format
TextInputFormat.addInputPath(job, inputDir);
TextInputFormat inFormat = new TextInputFormat();
inFormat.configure(job);
InputSplit[] splits = inFormat.getSplits(job, 1);
assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES,
numFiles, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0));
}
public void tearDown() throws Exception {
if (dfs != null) {
dfs.shutdown();
}
}
}