| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.test; |
| |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.io.File; |
| import java.io.FileWriter; |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.pig.PigRunner; |
| import org.apache.pig.tools.pigstats.PigStats; |
| import org.apache.pig.tools.pigstats.mapreduce.MRJobStats; |
| import org.junit.AfterClass; |
| import org.junit.Assume; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| public class TestEmptyInputDir { |
| |
| private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster(); |
| private static final String EMPTY_DIR = "emptydir"; |
| private static final String INPUT_FILE = "input"; |
| private static final String OUTPUT_FILE = "output"; |
| private static final String PIG_FILE = "test.pig"; |
| |
| |
| @BeforeClass |
| public static void setUpBeforeClass() throws Exception { |
| FileSystem fs = cluster.getFileSystem(); |
| if (!fs.mkdirs(new Path(EMPTY_DIR))) { |
| throw new Exception("failed to create empty dir"); |
| } |
| PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE)); |
| w.println("1\t2"); |
| w.println("5\t3"); |
| w.close(); |
| Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE); |
| new File(INPUT_FILE).delete(); |
| } |
| |
| @AfterClass |
| public static void tearDownAfterClass() throws Exception { |
| cluster.shutDown(); |
| } |
| |
| @Test |
| public void testGroupBy() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + EMPTY_DIR + "';"); |
| w.println("B = group A by $0;"); |
| w.println("store B into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| |
| // This assert fails on 205 due to MAPREDUCE-3606 |
| if (Util.isMapredExecType(cluster.getExecType()) |
| && !Util.isHadoop205() && !Util.isHadoop1_x()) { |
| MRJobStats js = (MRJobStats) stats.getJobGraph().getSources().get(0); |
| assertEquals(0, js.getNumberMaps()); |
| } |
| |
| //Spark doesn't create an empty result file part-*, only a _SUCCESS file if input dir was empty |
| Assume.assumeTrue("Skip this test for Spark. See PIG-5140", !Util.isSparkExecType(cluster.getExecType())); |
| assertEmptyOutputFile(); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testSkewedJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "';"); |
| w.println("B = load '" + EMPTY_DIR + "';"); |
| w.println("C = join B by $0, A by $0 using 'skewed';"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| |
| // This assert fails on 205 due to MAPREDUCE-3606 |
| if (Util.isMapredExecType(cluster.getExecType()) |
| && !Util.isHadoop205() && !Util.isHadoop1_x()) { |
| // the sampler job has zero maps |
| MRJobStats js = (MRJobStats) stats.getJobGraph().getSources().get(0); |
| assertEquals(0, js.getNumberMaps()); |
| } |
| |
| assertEmptyOutputFile(); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testMergeJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "';"); |
| w.println("B = load '" + EMPTY_DIR + "';"); |
| w.println("C = join A by $0, B by $0 using 'merge';"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| |
| // This assert fails on 205 due to MAPREDUCE-3606 |
| if (Util.isMapredExecType(cluster.getExecType()) |
| && !Util.isHadoop205() && !Util.isHadoop1_x()) { |
| // the indexer job has zero maps |
| MRJobStats js = (MRJobStats) stats.getJobGraph().getSources().get(0); |
| assertEquals(0, js.getNumberMaps()); |
| } |
| |
| assertEmptyOutputFile(); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testFRJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "';"); |
| w.println("B = load '" + EMPTY_DIR + "';"); |
| w.println("C = join A by $0, B by $0 using 'repl';"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| |
| // This assert fails on 205 due to MAPREDUCE-3606 |
| if (Util.isMapredExecType(cluster.getExecType()) |
| && !Util.isHadoop205() && !Util.isHadoop1_x()) { |
| MRJobStats js = (MRJobStats) stats.getJobGraph().getSources().get(0); |
| assertEquals(0, js.getNumberMaps()); |
| } |
| |
| assertEmptyOutputFile(); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testRegularJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "';"); |
| w.println("B = load '" + EMPTY_DIR + "';"); |
| w.println("C = join B by $0, A by $0 PARALLEL 0;"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| |
| assertEmptyOutputFile(); |
| |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testRightOuterJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "';"); |
| w.println("B = load '" + EMPTY_DIR + "' as (x:int);"); |
| w.println("C = join B by $0 right outer, A by $0;"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| assertEquals(2, stats.getNumberRecords(OUTPUT_FILE)); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testLeftOuterJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "' as (x:int);"); |
| w.println("B = load '" + EMPTY_DIR + "' as (x:int);"); |
| w.println("C = join B by $0 left outer, A by $0;"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| assertEquals(0, stats.getNumberRecords(OUTPUT_FILE)); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| } |
| } |
| |
| @Test |
| public void testBloomJoin() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "' as (x:int);"); |
| w.println("B = load '" + EMPTY_DIR + "' as (x:int);"); |
| w.println("C = join B by $0, A by $0 using 'bloom';"); |
| w.println("D = join A by $0, B by $0 using 'bloom';"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.println("store D into 'output1';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| assertEquals(0, stats.getNumberRecords(OUTPUT_FILE)); |
| assertEquals(0, stats.getNumberRecords("output1")); |
| assertEmptyOutputFile(); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| Util.deleteFile(cluster, "output1"); |
| } |
| } |
| |
| @Test |
| public void testBloomJoinOuter() throws Exception { |
| PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); |
| w.println("A = load '" + INPUT_FILE + "' as (x:int);"); |
| w.println("B = load '" + EMPTY_DIR + "' as (x:int);"); |
| w.println("C = join B by $0 left outer, A by $0 using 'bloom';"); |
| w.println("D = join A by $0 left outer, B by $0 using 'bloom';"); |
| w.println("E = join B by $0 right outer, A by $0 using 'bloom';"); |
| w.println("F = join A by $0 right outer, B by $0 using 'bloom';"); |
| w.println("store C into '" + OUTPUT_FILE + "';"); |
| w.println("store D into 'output1';"); |
| w.println("store E into 'output2';"); |
| w.println("store F into 'output3';"); |
| w.close(); |
| |
| try { |
| String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; |
| PigStats stats = PigRunner.run(args, null); |
| |
| assertTrue(stats.isSuccessful()); |
| assertEquals(0, stats.getNumberRecords(OUTPUT_FILE)); |
| assertEquals(2, stats.getNumberRecords("output1")); |
| assertEquals(2, stats.getNumberRecords("output2")); |
| assertEquals(0, stats.getNumberRecords("output3")); |
| assertEmptyOutputFile(); |
| } finally { |
| new File(PIG_FILE).delete(); |
| Util.deleteFile(cluster, OUTPUT_FILE); |
| Util.deleteFile(cluster, "output1"); |
| Util.deleteFile(cluster, "output2"); |
| Util.deleteFile(cluster, "output3"); |
| } |
| } |
| |
| private void assertEmptyOutputFile() throws IllegalArgumentException, IOException { |
| FileSystem fs = cluster.getFileSystem(); |
| FileStatus status = fs.getFileStatus(new Path(OUTPUT_FILE)); |
| assertTrue(status.isDir()); |
| assertEquals(0, status.getLen()); |
| // output directory isn't empty. Has one empty file |
| FileStatus[] files = fs.listStatus(status.getPath(), Util.getSuccessMarkerPathFilter()); |
| assertEquals(1, files.length); |
| assertEquals(0, files[0].getLen()); |
| assertTrue(files[0].getPath().getName().startsWith("part-")); |
| } |
| } |