test/org/apache/pig/test/TestFRJoin2.java - pig - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.pig.test;

 import static org.junit.Assert.assertEquals;

 import java.io.File;
 import java.util.Iterator;

 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.pig.ExecType;
 import org.apache.pig.PigServer;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.test.utils.TestHelper;
 import org.apache.pig.tools.pigstats.JobStats;
 import org.apache.pig.tools.pigstats.PigStats;
 import org.apache.pig.tools.pigstats.PigStats.JobGraph;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;

 public class TestFRJoin2 {

     private static MiniCluster cluster = MiniCluster.buildCluster();

     private static final String INPUT_DIR = "frjoin";
     private static final String INPUT_FILE = "input";

     private static final int FILE_MERGE_THRESHOLD = 5;
     private static final int MIN_FILE_MERGE_THRESHOLD = 1;

     //contents of input dir joined by comma
     private static String concatINPUT_DIR = null;
     private File logFile;


     @BeforeClass
     public static void setUpBeforeClass() throws Exception {
         StringBuilder strBuilder = new StringBuilder();
         FileSystem fs = cluster.getFileSystem();
         fs.mkdirs(new Path(INPUT_DIR));
         int LOOP_SIZE = 2;
         for (int i=0; i<FILE_MERGE_THRESHOLD; i++) {
             String[] input = new String[2*LOOP_SIZE];
             for (int n=0; n<LOOP_SIZE; n++) {
                 for (int j=0; j<LOOP_SIZE;j++) {
                     input[n*LOOP_SIZE + j] = i + "\t" + (j + n);
                 }
             }
             String newFile = INPUT_DIR + "/part-0000" + i;
             Util.createInputFile(cluster, newFile, input);
             strBuilder.append(newFile);
             strBuilder.append(",");
         }
         strBuilder.deleteCharAt(strBuilder.length() - 1);
         concatINPUT_DIR = strBuilder.toString();

         String[] input2 = new String[2*(LOOP_SIZE/2)];
         int k = 0;
         for (int i=1; i<=LOOP_SIZE/2; i++) {
             String si = i + "";
             for (int j=0; j<=LOOP_SIZE/2; j++) {
                 input2[k++] = si + "\t" + j;
             }
         }
         Util.createInputFile(cluster, INPUT_FILE, input2);
     }

     @AfterClass
     public static void tearDownAfterClass() throws Exception {
         cluster.shutDown();
     }

     // test simple scalar alias with file concatenation following
     // a MapReduce job
     @Test
     public void testConcatenateJobForScalar() throws Exception {
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
                 .getProperties());

         pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");

         // using $0*0, instead of group-all because group-all sets parallelism to 1
         pigServer.registerQuery("B = group A by $0*0 parallel 5;");
         pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.y) as max;");

         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(FILE_MERGE_THRESHOLD));

             pigServer.registerQuery("D= foreach A generate x / C.count, C.max - y;");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 dbfrj.add(iter.next());
             }

             JobGraph jGraph = PigStats.get().getJobGraph();
             assertEquals(3, jGraph.size());
             // find added map-only concatenate job
             JobStats js = (JobStats)jGraph.getSuccessors(jGraph.getSources().get(0)).get(0);
             assertEquals(1, js.getNumberMaps());
             assertEquals(0, js.getNumberReduces());
         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");

             pigServer.registerQuery("D= foreach A generate x / C.count, C.max - y;");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 dbshj.add(iter.next());
             }

             assertEquals(2, PigStats.get().getJobGraph().size());
         }

         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

     // test simple scalar alias with file concatenation following
     // a Map-only job
     @Test
     public void testConcatenateJobForScalar2() throws Exception {
         logFile = Util.resetLog(MRCompiler.class, logFile);
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
                 .getProperties());

         pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         pigServer.registerQuery("B = LOAD '" + INPUT_DIR +  "/{part-00*}" +"' as (x:int,y:int);");
         pigServer.registerQuery("C = filter B by (x == 3) AND (y == 2);");

         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));

             pigServer.registerQuery("D = foreach A generate x / C.x, y + C.y;");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 dbfrj.add(iter.next());
             }

             JobGraph jGraph = PigStats.get().getJobGraph();
             assertEquals(3, jGraph.size());
             // find added map-only concatenate job
             JobStats js = (JobStats)jGraph.getSuccessors(jGraph.getSources().get(0)).get(0);
             assertEquals(1, js.getNumberMaps());
             assertEquals(0, js.getNumberReduces());
             Util.checkLogFileMessage(logFile,
                     new String[] {"number of input files: 0", "failed to get number of input files"},
                     false
             );
         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");

             pigServer.registerQuery("D = foreach A generate x / C.x, y + C.y;");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 dbshj.add(iter.next());
             }

             assertEquals(2, PigStats.get().getJobGraph().size());
         }

         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

     // test scalar alias with file concatenation following
     // a multi-query job
     @Test
     public void testConcatenateJobForScalar3() throws Exception {
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
                 .getProperties());

         pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         pigServer.registerQuery("C = group A all parallel 5;");
         pigServer.registerQuery("D = foreach C generate COUNT(A) as count;");
         pigServer.registerQuery("E = foreach C generate MAX(A.x) as max;");

         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));

             pigServer.registerQuery("F = foreach B generate x / D.count, y + E.max;");
             Iterator<Tuple> iter = pigServer.openIterator("F");

             while(iter.hasNext()) {
                 dbfrj.add(iter.next());
             }

             JobGraph jGraph = PigStats.get().getJobGraph();
             assertEquals(4, jGraph.size());
         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");

             pigServer.registerQuery("F = foreach B generate x / D.count, y + E.max;");
             Iterator<Tuple> iter = pigServer.openIterator("F");

             while(iter.hasNext()) {
                 dbshj.add(iter.next());
             }

             assertEquals(2, PigStats.get().getJobGraph().size());
         }

         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

     @Test
     public void testConcatenateJobForFRJoin() throws Exception {
         logFile = Util.resetLog(MRCompiler.class, logFile);
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
                 .getProperties());

         pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         pigServer.registerQuery("B = LOAD '" + INPUT_DIR +  "/{part-00*}" + "' as (x:int,y:int);");

         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));

             pigServer.registerQuery("C = join A by y, B by y using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("C");

             while(iter.hasNext()) {
                 dbfrj.add(iter.next());
             }

             assertEquals(3, PigStats.get().getJobGraph().size());
             Util.checkLogFileMessage(logFile,
                     new String[] {"number of input files: 0", "failed to get number of input files"},
                     false
             );

         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");

             pigServer.registerQuery("C = join A by y, B by y using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("C");

             while(iter.hasNext()) {
                 dbshj.add(iter.next());
             }

             assertEquals(2, PigStats.get().getJobGraph().size());
         }

         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

     @Test
     public void testTooManyReducers() throws Exception {
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
                 .getProperties());

         pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         pigServer.registerQuery("B = group A by x parallel " + FILE_MERGE_THRESHOLD + ";");
         pigServer.registerQuery("C = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(FILE_MERGE_THRESHOLD));
             pigServer.registerQuery("D = join C by $0, B by $0 using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 Tuple t = iter.next();
                 dbfrj.add(t);
             }

             JobGraph jGraph = PigStats.get().getJobGraph();
             assertEquals(3, jGraph.size());
             // find added map-only concatenate job
             JobStats js = (JobStats)jGraph.getSuccessors(jGraph.getSources().get(0)).get(0);
             assertEquals(1, js.getNumberMaps());
             assertEquals(0, js.getNumberReduces());
         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");
             pigServer.registerQuery("D = join C by $0, B by $0 using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 Tuple t = iter.next();
                 dbshj.add(t);
             }
             assertEquals(2, PigStats.get().getJobGraph().size());
         }
         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

     @Test
     public void testUnknownNumMaps() throws Exception {
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

         pigServer.registerQuery("A = LOAD '" + concatINPUT_DIR + "' as (x:int,y:int);");
         pigServer.registerQuery("B = Filter A by x < 50;");
         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));
             pigServer.registerQuery("C = join A by $0, B by $0 using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("C");

             while(iter.hasNext()) {
                 dbfrj.add(iter.next());
             }

             JobGraph jGraph = PigStats.get().getJobGraph();
             assertEquals(3, jGraph.size());
         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");
             pigServer.registerQuery("C = join A by $0, B by $0 using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("C");

             while(iter.hasNext()) {
                 dbshj.add(iter.next());
             }
             assertEquals(2, PigStats.get().getJobGraph().size());
         }
         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

     @Test
     public void testUnknownNumMaps2() throws Exception {
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

         pigServer.registerQuery("A = LOAD '" + INPUT_DIR + "' as (x:int,y:int);");
         pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
         pigServer.registerQuery("C = join A by x, B by x using 'repl';");
         DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
         {
             pigServer.getPigContext().getProperties().setProperty(
                     MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));
             pigServer.registerQuery("D = join B by $0, C by $0 using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 dbfrj.add(iter.next());
             }

             JobGraph jGraph = PigStats.get().getJobGraph();
             assertEquals(5, jGraph.size());
         }
         {
             pigServer.getPigContext().getProperties().setProperty(
                     "pig.noSplitCombination", "true");
             pigServer.registerQuery("D = join B by $0, C by $0 using 'repl';");
             Iterator<Tuple> iter = pigServer.openIterator("D");

             while(iter.hasNext()) {
                 dbshj.add(iter.next());
             }
             assertEquals(3, PigStats.get().getJobGraph().size());
         }
         assertEquals(dbfrj.size(), dbshj.size());
         assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.pig.test;

	import static org.junit.Assert.assertEquals;

	import java.io.File;
	import java.util.Iterator;

	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.pig.ExecType;
	import org.apache.pig.PigServer;
	import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler;
	import org.apache.pig.data.BagFactory;
	import org.apache.pig.data.DataBag;
	import org.apache.pig.data.Tuple;
	import org.apache.pig.test.utils.TestHelper;
	import org.apache.pig.tools.pigstats.JobStats;
	import org.apache.pig.tools.pigstats.PigStats;
	import org.apache.pig.tools.pigstats.PigStats.JobGraph;
	import org.junit.AfterClass;
	import org.junit.BeforeClass;
	import org.junit.Test;

	public class TestFRJoin2 {

	private static MiniCluster cluster = MiniCluster.buildCluster();

	private static final String INPUT_DIR = "frjoin";
	private static final String INPUT_FILE = "input";

	private static final int FILE_MERGE_THRESHOLD = 5;
	private static final int MIN_FILE_MERGE_THRESHOLD = 1;

	//contents of input dir joined by comma
	private static String concatINPUT_DIR = null;
	private File logFile;


	@BeforeClass
	public static void setUpBeforeClass() throws Exception {
	StringBuilder strBuilder = new StringBuilder();
	FileSystem fs = cluster.getFileSystem();
	fs.mkdirs(new Path(INPUT_DIR));
	int LOOP_SIZE = 2;
	for (int i=0; i<FILE_MERGE_THRESHOLD; i++) {
	String[] input = new String[2*LOOP_SIZE];
	for (int n=0; n<LOOP_SIZE; n++) {
	for (int j=0; j<LOOP_SIZE;j++) {
	input[n*LOOP_SIZE + j] = i + "\t" + (j + n);
	}
	}
	String newFile = INPUT_DIR + "/part-0000" + i;
	Util.createInputFile(cluster, newFile, input);
	strBuilder.append(newFile);
	strBuilder.append(",");
	}
	strBuilder.deleteCharAt(strBuilder.length() - 1);
	concatINPUT_DIR = strBuilder.toString();

	String[] input2 = new String[2*(LOOP_SIZE/2)];
	int k = 0;
	for (int i=1; i<=LOOP_SIZE/2; i++) {
	String si = i + "";
	for (int j=0; j<=LOOP_SIZE/2; j++) {
	input2[k++] = si + "\t" + j;
	}
	}
	Util.createInputFile(cluster, INPUT_FILE, input2);
	}

	@AfterClass
	public static void tearDownAfterClass() throws Exception {
	cluster.shutDown();
	}

	// test simple scalar alias with file concatenation following
	// a MapReduce job
	@Test
	public void testConcatenateJobForScalar() throws Exception {
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
	.getProperties());

	pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");

	// using $0*0, instead of group-all because group-all sets parallelism to 1
	pigServer.registerQuery("B = group A by $0*0 parallel 5;");
	pigServer.registerQuery("C = foreach B generate COUNT(A) as count, MAX(A.y) as max;");

	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(FILE_MERGE_THRESHOLD));

	pigServer.registerQuery("D= foreach A generate x / C.count, C.max - y;");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	dbfrj.add(iter.next());
	}

	JobGraph jGraph = PigStats.get().getJobGraph();
	assertEquals(3, jGraph.size());
	// find added map-only concatenate job
	JobStats js = (JobStats)jGraph.getSuccessors(jGraph.getSources().get(0)).get(0);
	assertEquals(1, js.getNumberMaps());
	assertEquals(0, js.getNumberReduces());
	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");

	pigServer.registerQuery("D= foreach A generate x / C.count, C.max - y;");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	dbshj.add(iter.next());
	}

	assertEquals(2, PigStats.get().getJobGraph().size());
	}

	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	// test simple scalar alias with file concatenation following
	// a Map-only job
	@Test
	public void testConcatenateJobForScalar2() throws Exception {
	logFile = Util.resetLog(MRCompiler.class, logFile);
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
	.getProperties());

	pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	pigServer.registerQuery("B = LOAD '" + INPUT_DIR + "/{part-00*}" +"' as (x:int,y:int);");
	pigServer.registerQuery("C = filter B by (x == 3) AND (y == 2);");

	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));

	pigServer.registerQuery("D = foreach A generate x / C.x, y + C.y;");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	dbfrj.add(iter.next());
	}

	JobGraph jGraph = PigStats.get().getJobGraph();
	assertEquals(3, jGraph.size());
	// find added map-only concatenate job
	JobStats js = (JobStats)jGraph.getSuccessors(jGraph.getSources().get(0)).get(0);
	assertEquals(1, js.getNumberMaps());
	assertEquals(0, js.getNumberReduces());
	Util.checkLogFileMessage(logFile,
	new String[] {"number of input files: 0", "failed to get number of input files"},
	false
	);
	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");

	pigServer.registerQuery("D = foreach A generate x / C.x, y + C.y;");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	dbshj.add(iter.next());
	}

	assertEquals(2, PigStats.get().getJobGraph().size());
	}

	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	// test scalar alias with file concatenation following
	// a multi-query job
	@Test
	public void testConcatenateJobForScalar3() throws Exception {
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
	.getProperties());

	pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	pigServer.registerQuery("C = group A all parallel 5;");
	pigServer.registerQuery("D = foreach C generate COUNT(A) as count;");
	pigServer.registerQuery("E = foreach C generate MAX(A.x) as max;");

	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));

	pigServer.registerQuery("F = foreach B generate x / D.count, y + E.max;");
	Iterator<Tuple> iter = pigServer.openIterator("F");

	while(iter.hasNext()) {
	dbfrj.add(iter.next());
	}

	JobGraph jGraph = PigStats.get().getJobGraph();
	assertEquals(4, jGraph.size());
	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");

	pigServer.registerQuery("F = foreach B generate x / D.count, y + E.max;");
	Iterator<Tuple> iter = pigServer.openIterator("F");

	while(iter.hasNext()) {
	dbshj.add(iter.next());
	}

	assertEquals(2, PigStats.get().getJobGraph().size());
	}

	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	@Test
	public void testConcatenateJobForFRJoin() throws Exception {
	logFile = Util.resetLog(MRCompiler.class, logFile);
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
	.getProperties());

	pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	pigServer.registerQuery("B = LOAD '" + INPUT_DIR + "/{part-00*}" + "' as (x:int,y:int);");

	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));

	pigServer.registerQuery("C = join A by y, B by y using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("C");

	while(iter.hasNext()) {
	dbfrj.add(iter.next());
	}

	assertEquals(3, PigStats.get().getJobGraph().size());
	Util.checkLogFileMessage(logFile,
	new String[] {"number of input files: 0", "failed to get number of input files"},
	false
	);

	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");

	pigServer.registerQuery("C = join A by y, B by y using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("C");

	while(iter.hasNext()) {
	dbshj.add(iter.next());
	}

	assertEquals(2, PigStats.get().getJobGraph().size());
	}

	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	@Test
	public void testTooManyReducers() throws Exception {
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster
	.getProperties());

	pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	pigServer.registerQuery("B = group A by x parallel " + FILE_MERGE_THRESHOLD + ";");
	pigServer.registerQuery("C = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(FILE_MERGE_THRESHOLD));
	pigServer.registerQuery("D = join C by $0, B by $0 using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	Tuple t = iter.next();
	dbfrj.add(t);
	}

	JobGraph jGraph = PigStats.get().getJobGraph();
	assertEquals(3, jGraph.size());
	// find added map-only concatenate job
	JobStats js = (JobStats)jGraph.getSuccessors(jGraph.getSources().get(0)).get(0);
	assertEquals(1, js.getNumberMaps());
	assertEquals(0, js.getNumberReduces());
	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");
	pigServer.registerQuery("D = join C by $0, B by $0 using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	Tuple t = iter.next();
	dbshj.add(t);
	}
	assertEquals(2, PigStats.get().getJobGraph().size());
	}
	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	@Test
	public void testUnknownNumMaps() throws Exception {
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

	pigServer.registerQuery("A = LOAD '" + concatINPUT_DIR + "' as (x:int,y:int);");
	pigServer.registerQuery("B = Filter A by x < 50;");
	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));
	pigServer.registerQuery("C = join A by $0, B by $0 using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("C");

	while(iter.hasNext()) {
	dbfrj.add(iter.next());
	}

	JobGraph jGraph = PigStats.get().getJobGraph();
	assertEquals(3, jGraph.size());
	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");
	pigServer.registerQuery("C = join A by $0, B by $0 using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("C");

	while(iter.hasNext()) {
	dbshj.add(iter.next());
	}
	assertEquals(2, PigStats.get().getJobGraph().size());
	}
	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	@Test
	public void testUnknownNumMaps2() throws Exception {
	PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

	pigServer.registerQuery("A = LOAD '" + INPUT_DIR + "' as (x:int,y:int);");
	pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
	pigServer.registerQuery("C = join A by x, B by x using 'repl';");
	DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
	{
	pigServer.getPigContext().getProperties().setProperty(
	MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(MIN_FILE_MERGE_THRESHOLD));
	pigServer.registerQuery("D = join B by $0, C by $0 using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	dbfrj.add(iter.next());
	}

	JobGraph jGraph = PigStats.get().getJobGraph();
	assertEquals(5, jGraph.size());
	}
	{
	pigServer.getPigContext().getProperties().setProperty(
	"pig.noSplitCombination", "true");
	pigServer.registerQuery("D = join B by $0, C by $0 using 'repl';");
	Iterator<Tuple> iter = pigServer.openIterator("D");

	while(iter.hasNext()) {
	dbshj.add(iter.next());
	}
	assertEquals(3, PigStats.get().getJobGraph().size());
	}
	assertEquals(dbfrj.size(), dbshj.size());
	assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
	}

	}