src/test/org/apache/hadoop/mapred/TestDuplicateArchiveFileCachedURLMinicluster.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.mapred;

 import java.io.BufferedReader;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;

 import java.net.URI;

 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
 import org.apache.hadoop.mapred.lib.IdentityReducer;

 import org.apache.hadoop.filecache.DistributedCache;

 public class TestDuplicateArchiveFileCachedURLMinicluster extends ClusterMapReduceTestCase {

   enum EnumCounter { MAP_RECORDS }

   public void testDuplicationsMinicluster() throws Exception {
     OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
     Writer wr = new OutputStreamWriter(os);
     wr.write("hello1\n");
     wr.write("hello2\n");
     wr.write("hello3\n");
     wr.write("hello4\n");
     wr.close();

     JobConf conf = createJobConf();
     conf.setJobName("counters");

     conf.setInputFormat(TextInputFormat.class);

     conf.setMapOutputKeyClass(LongWritable.class);
     conf.setMapOutputValueClass(Text.class);

     conf.setOutputFormat(TextOutputFormat.class);
     conf.setOutputKeyClass(LongWritable.class);
     conf.setOutputValueClass(Text.class);

     conf.setMapperClass(IdentityMapper.class);
     conf.setReducerClass(IdentityReducer.class);

     FileInputFormat.setInputPaths(conf, getInputDir());

     FileOutputFormat.setOutputPath(conf, getOutputDir());

     Path inputRoot = getInputDir().makeQualified(getFileSystem());
     Path unqualifiedInputRoot = getInputDir();
     System.out.println("The qualified input dir is " + inputRoot.toString());
     System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString());

     Path duplicatedPath = new Path(inputRoot, "text.txt");
     URI duplicatedURI = duplicatedPath.toUri();

     Path unqualifiedDuplicatedPath = new Path(unqualifiedInputRoot, "text.txt");
     URI unqualifiedDuplicatedURI = unqualifiedDuplicatedPath.toUri();

     System.out.println("The duplicated Path is " + duplicatedPath);
     System.out.println("The duplicated URI is " + duplicatedURI);
     System.out.println("The unqualified duplicated URI is " + unqualifiedDuplicatedURI);

     DistributedCache.addCacheArchive(duplicatedURI, conf);
     DistributedCache.addCacheFile(unqualifiedDuplicatedURI, conf);

     try {
       RunningJob runningJob = JobClient.runJob(conf);

       assertFalse("The job completed, which is wrong since there's a duplication", true);
     } catch (InvalidJobConfException e) {
       System.out.println("We expect to see a stack trace here.");
       e.printStackTrace(System.out);
     }
   }

   public void testApparentDuplicationsMinicluster() throws Exception {
     OutputStream os = getFileSystem().create(new Path(getInputDir(), "text2.txt"));
     Writer wr = new OutputStreamWriter(os);
     wr.write("hello1\n");
     wr.write("hello2\n");
     wr.write("hello3\n");
     wr.write("hello4\n");
     wr.close();

     JobConf conf = createJobConf();
     conf.setJobName("counters");

     conf.setInputFormat(TextInputFormat.class);

     conf.setMapOutputKeyClass(LongWritable.class);
     conf.setMapOutputValueClass(Text.class);

     conf.setOutputFormat(TextOutputFormat.class);
     conf.setOutputKeyClass(LongWritable.class);
     conf.setOutputValueClass(Text.class);

     conf.setMapperClass(IdentityMapper.class);
     conf.setReducerClass(IdentityReducer.class);

     final FileSystem lfs = FileSystem.getLocal(conf);

     FileInputFormat.setInputPaths(conf, getInputDir());

     FileOutputFormat.setOutputPath(conf, getOutputDir());

     Path localInputRoot = getInputDir().makeQualified(lfs);
     Path dfsInputRoot = getInputDir().makeQualified(getFileSystem());
     Path unqualifiedInputRoot = getInputDir();
     System.out.println("The qualified input dir is " + dfsInputRoot.toString());
     System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString());

     Path dfsUnqualPath = new Path(unqualifiedInputRoot, "text2.txt");
     Path dfsQualPath = new Path(dfsInputRoot, "test2.text");
     Path localQualPath = new Path(localInputRoot, "test2.text");

     System.out.println("The dfs unqualified Path is " + dfsUnqualPath);
     System.out.println("The dfs qualified Path is " + dfsQualPath);
     System.out.println("The local qualified path is " + localQualPath);

     DistributedCache.addCacheArchive(localQualPath.toUri(), conf);
     DistributedCache.addCacheFile(dfsUnqualPath.toUri(), conf);
     DistributedCache.addCacheFile(dfsQualPath.toUri(), conf);

     try {
       RunningJob runningJob = JobClient.runJob(conf);

       assertFalse("The job completed, which is wrong since there's no local cached file", true);
     } catch (InvalidJobConfException e) {
       System.out.println("We expect to see a stack trace here.");
       e.printStackTrace(System.out);
       assertFalse("This error should not occur.", true);
     } catch (FileNotFoundException e) {
       System.out.println(" got an expected FileNotFoundException because we didn't provide cached files");
     }
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.mapred;

	import java.io.BufferedReader;
	import java.io.FileNotFoundException;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.io.OutputStream;
	import java.io.OutputStreamWriter;
	import java.io.Writer;

	import java.net.URI;

	import org.apache.hadoop.fs.FileUtil;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapred.lib.IdentityMapper;
	import org.apache.hadoop.mapred.lib.IdentityReducer;

	import org.apache.hadoop.filecache.DistributedCache;

	public class TestDuplicateArchiveFileCachedURLMinicluster extends ClusterMapReduceTestCase {

	enum EnumCounter { MAP_RECORDS }

	public void testDuplicationsMinicluster() throws Exception {
	OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
	Writer wr = new OutputStreamWriter(os);
	wr.write("hello1\n");
	wr.write("hello2\n");
	wr.write("hello3\n");
	wr.write("hello4\n");
	wr.close();

	JobConf conf = createJobConf();
	conf.setJobName("counters");

	conf.setInputFormat(TextInputFormat.class);

	conf.setMapOutputKeyClass(LongWritable.class);
	conf.setMapOutputValueClass(Text.class);

	conf.setOutputFormat(TextOutputFormat.class);
	conf.setOutputKeyClass(LongWritable.class);
	conf.setOutputValueClass(Text.class);

	conf.setMapperClass(IdentityMapper.class);
	conf.setReducerClass(IdentityReducer.class);

	FileInputFormat.setInputPaths(conf, getInputDir());

	FileOutputFormat.setOutputPath(conf, getOutputDir());

	Path inputRoot = getInputDir().makeQualified(getFileSystem());
	Path unqualifiedInputRoot = getInputDir();
	System.out.println("The qualified input dir is " + inputRoot.toString());
	System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString());

	Path duplicatedPath = new Path(inputRoot, "text.txt");
	URI duplicatedURI = duplicatedPath.toUri();

	Path unqualifiedDuplicatedPath = new Path(unqualifiedInputRoot, "text.txt");
	URI unqualifiedDuplicatedURI = unqualifiedDuplicatedPath.toUri();

	System.out.println("The duplicated Path is " + duplicatedPath);
	System.out.println("The duplicated URI is " + duplicatedURI);
	System.out.println("The unqualified duplicated URI is " + unqualifiedDuplicatedURI);

	DistributedCache.addCacheArchive(duplicatedURI, conf);
	DistributedCache.addCacheFile(unqualifiedDuplicatedURI, conf);

	try {
	RunningJob runningJob = JobClient.runJob(conf);

	assertFalse("The job completed, which is wrong since there's a duplication", true);
	} catch (InvalidJobConfException e) {
	System.out.println("We expect to see a stack trace here.");
	e.printStackTrace(System.out);
	}
	}

	public void testApparentDuplicationsMinicluster() throws Exception {
	OutputStream os = getFileSystem().create(new Path(getInputDir(), "text2.txt"));
	Writer wr = new OutputStreamWriter(os);
	wr.write("hello1\n");
	wr.write("hello2\n");
	wr.write("hello3\n");
	wr.write("hello4\n");
	wr.close();

	JobConf conf = createJobConf();
	conf.setJobName("counters");

	conf.setInputFormat(TextInputFormat.class);

	conf.setMapOutputKeyClass(LongWritable.class);
	conf.setMapOutputValueClass(Text.class);

	conf.setOutputFormat(TextOutputFormat.class);
	conf.setOutputKeyClass(LongWritable.class);
	conf.setOutputValueClass(Text.class);

	conf.setMapperClass(IdentityMapper.class);
	conf.setReducerClass(IdentityReducer.class);

	final FileSystem lfs = FileSystem.getLocal(conf);

	FileInputFormat.setInputPaths(conf, getInputDir());

	FileOutputFormat.setOutputPath(conf, getOutputDir());

	Path localInputRoot = getInputDir().makeQualified(lfs);
	Path dfsInputRoot = getInputDir().makeQualified(getFileSystem());
	Path unqualifiedInputRoot = getInputDir();
	System.out.println("The qualified input dir is " + dfsInputRoot.toString());
	System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString());

	Path dfsUnqualPath = new Path(unqualifiedInputRoot, "text2.txt");
	Path dfsQualPath = new Path(dfsInputRoot, "test2.text");
	Path localQualPath = new Path(localInputRoot, "test2.text");

	System.out.println("The dfs unqualified Path is " + dfsUnqualPath);
	System.out.println("The dfs qualified Path is " + dfsQualPath);
	System.out.println("The local qualified path is " + localQualPath);

	DistributedCache.addCacheArchive(localQualPath.toUri(), conf);
	DistributedCache.addCacheFile(dfsUnqualPath.toUri(), conf);
	DistributedCache.addCacheFile(dfsQualPath.toUri(), conf);

	try {
	RunningJob runningJob = JobClient.runJob(conf);

	assertFalse("The job completed, which is wrong since there's no local cached file", true);
	} catch (InvalidJobConfException e) {
	System.out.println("We expect to see a stack trace here.");
	e.printStackTrace(System.out);
	assertFalse("This error should not occur.", true);
	} catch (FileNotFoundException e) {
	System.out.println(" got an expected FileNotFoundException because we didn't provide cached files");
	}
	}
	}