| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.mapred; |
| |
| import java.io.BufferedReader; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.OutputStream; |
| import java.io.OutputStreamWriter; |
| import java.io.Writer; |
| |
| import java.net.URI; |
| |
| import org.apache.hadoop.fs.FileUtil; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.io.LongWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapred.lib.IdentityMapper; |
| import org.apache.hadoop.mapred.lib.IdentityReducer; |
| |
| import org.apache.hadoop.filecache.DistributedCache; |
| |
| public class TestDuplicateArchiveFileCachedURLMinicluster extends ClusterMapReduceTestCase { |
| |
| enum EnumCounter { MAP_RECORDS } |
| |
| public void testDuplicationsMinicluster() throws Exception { |
| OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); |
| Writer wr = new OutputStreamWriter(os); |
| wr.write("hello1\n"); |
| wr.write("hello2\n"); |
| wr.write("hello3\n"); |
| wr.write("hello4\n"); |
| wr.close(); |
| |
| JobConf conf = createJobConf(); |
| conf.setJobName("counters"); |
| |
| conf.setInputFormat(TextInputFormat.class); |
| |
| conf.setMapOutputKeyClass(LongWritable.class); |
| conf.setMapOutputValueClass(Text.class); |
| |
| conf.setOutputFormat(TextOutputFormat.class); |
| conf.setOutputKeyClass(LongWritable.class); |
| conf.setOutputValueClass(Text.class); |
| |
| conf.setMapperClass(IdentityMapper.class); |
| conf.setReducerClass(IdentityReducer.class); |
| |
| FileInputFormat.setInputPaths(conf, getInputDir()); |
| |
| FileOutputFormat.setOutputPath(conf, getOutputDir()); |
| |
| Path inputRoot = getInputDir().makeQualified(getFileSystem()); |
| Path unqualifiedInputRoot = getInputDir(); |
| System.out.println("The qualified input dir is " + inputRoot.toString()); |
| System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString()); |
| |
| Path duplicatedPath = new Path(inputRoot, "text.txt"); |
| URI duplicatedURI = duplicatedPath.toUri(); |
| |
| Path unqualifiedDuplicatedPath = new Path(unqualifiedInputRoot, "text.txt"); |
| URI unqualifiedDuplicatedURI = unqualifiedDuplicatedPath.toUri(); |
| |
| System.out.println("The duplicated Path is " + duplicatedPath); |
| System.out.println("The duplicated URI is " + duplicatedURI); |
| System.out.println("The unqualified duplicated URI is " + unqualifiedDuplicatedURI); |
| |
| DistributedCache.addCacheArchive(duplicatedURI, conf); |
| DistributedCache.addCacheFile(unqualifiedDuplicatedURI, conf); |
| |
| try { |
| RunningJob runningJob = JobClient.runJob(conf); |
| |
| assertFalse("The job completed, which is wrong since there's a duplication", true); |
| } catch (InvalidJobConfException e) { |
| System.out.println("We expect to see a stack trace here."); |
| e.printStackTrace(System.out); |
| } |
| } |
| |
| public void testApparentDuplicationsMinicluster() throws Exception { |
| OutputStream os = getFileSystem().create(new Path(getInputDir(), "text2.txt")); |
| Writer wr = new OutputStreamWriter(os); |
| wr.write("hello1\n"); |
| wr.write("hello2\n"); |
| wr.write("hello3\n"); |
| wr.write("hello4\n"); |
| wr.close(); |
| |
| JobConf conf = createJobConf(); |
| conf.setJobName("counters"); |
| |
| conf.setInputFormat(TextInputFormat.class); |
| |
| conf.setMapOutputKeyClass(LongWritable.class); |
| conf.setMapOutputValueClass(Text.class); |
| |
| conf.setOutputFormat(TextOutputFormat.class); |
| conf.setOutputKeyClass(LongWritable.class); |
| conf.setOutputValueClass(Text.class); |
| |
| conf.setMapperClass(IdentityMapper.class); |
| conf.setReducerClass(IdentityReducer.class); |
| |
| final FileSystem lfs = FileSystem.getLocal(conf); |
| |
| FileInputFormat.setInputPaths(conf, getInputDir()); |
| |
| FileOutputFormat.setOutputPath(conf, getOutputDir()); |
| |
| Path localInputRoot = getInputDir().makeQualified(lfs); |
| Path dfsInputRoot = getInputDir().makeQualified(getFileSystem()); |
| Path unqualifiedInputRoot = getInputDir(); |
| System.out.println("The qualified input dir is " + dfsInputRoot.toString()); |
| System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString()); |
| |
| Path dfsUnqualPath = new Path(unqualifiedInputRoot, "text2.txt"); |
| Path dfsQualPath = new Path(dfsInputRoot, "test2.text"); |
| Path localQualPath = new Path(localInputRoot, "test2.text"); |
| |
| System.out.println("The dfs unqualified Path is " + dfsUnqualPath); |
| System.out.println("The dfs qualified Path is " + dfsQualPath); |
| System.out.println("The local qualified path is " + localQualPath); |
| |
| DistributedCache.addCacheArchive(localQualPath.toUri(), conf); |
| DistributedCache.addCacheFile(dfsUnqualPath.toUri(), conf); |
| DistributedCache.addCacheFile(dfsQualPath.toUri(), conf); |
| |
| try { |
| RunningJob runningJob = JobClient.runJob(conf); |
| |
| assertFalse("The job completed, which is wrong since there's no local cached file", true); |
| } catch (InvalidJobConfException e) { |
| System.out.println("We expect to see a stack trace here."); |
| e.printStackTrace(System.out); |
| assertFalse("This error should not occur.", true); |
| } catch (FileNotFoundException e) { |
| System.out.println(" got an expected FileNotFoundException because we didn't provide cached files"); |
| } |
| } |
| } |